LLVM  4.0.0
SROA.cpp
Go to the documentation of this file.
1 //===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This transformation implements the well known scalar replacement of
11 /// aggregates transformation. It tries to identify promotable elements of an
12 /// aggregate alloca, and promote them to registers. It will also try to
13 /// convert uses of an element (or set of elements) of an alloca into a vector
14 /// or bitfield-style integer scalar if appropriate.
15 ///
16 /// It works to do this with minimal slicing of the alloca so that regions
17 /// which are merely transferred in and out of external memory remain unchanged
18 /// and are not decomposed to scalar code.
19 ///
20 /// Because this also performs alloca promotion, it can be thought of as also
21 /// serving the purpose of SSA formation. The algorithm iterates on the
22 /// function until all opportunities for promotion have been realized.
23 ///
24 //===----------------------------------------------------------------------===//
25 
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/Loads.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DIBuilder.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DebugInfo.h"
39 #include "llvm/IR/DerivedTypes.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/InstVisitor.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/Operator.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/Chrono.h"
49 #include "llvm/Support/Compiler.h"
50 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
57 
58 #ifndef NDEBUG
59 // We only use this for a debug check.
60 #include <random>
61 #endif
62 
63 using namespace llvm;
64 using namespace llvm::sroa;
65 
66 #define DEBUG_TYPE "sroa"
67 
68 STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
69 STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
70 STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
71 STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
72 STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
73 STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
74 STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
75 STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
76 STATISTIC(NumDeleted, "Number of instructions deleted");
77 STATISTIC(NumVectorized, "Number of vectorized aggregates");
78 
79 /// Hidden option to enable randomly shuffling the slices to help uncover
80 /// instability in their order.
81 static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
82  cl::init(false), cl::Hidden);
83 
84 /// Hidden option to experiment with completely strict handling of inbounds
85 /// GEPs.
86 static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
87  cl::Hidden);
88 
89 namespace {
90 /// \brief A custom IRBuilder inserter which prefixes all names, but only in
91 /// Assert builds.
92 class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter {
93  std::string Prefix;
94  const Twine getNameWithPrefix(const Twine &Name) const {
95  return Name.isTriviallyEmpty() ? Name : Prefix + Name;
96  }
97 
98 public:
99  void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
100 
101 protected:
102  void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
103  BasicBlock::iterator InsertPt) const {
104  IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB,
105  InsertPt);
106  }
107 };
108 
109 /// \brief Provide a typedef for IRBuilder that drops names in release builds.
111 }
112 
113 namespace {
114 /// \brief A used slice of an alloca.
115 ///
116 /// This structure represents a slice of an alloca used by some instruction. It
117 /// stores both the begin and end offsets of this use, a pointer to the use
118 /// itself, and a flag indicating whether we can classify the use as splittable
119 /// or not when forming partitions of the alloca.
120 class Slice {
121  /// \brief The beginning offset of the range.
122  uint64_t BeginOffset;
123 
124  /// \brief The ending offset, not included in the range.
125  uint64_t EndOffset;
126 
127  /// \brief Storage for both the use of this slice and whether it can be
128  /// split.
129  PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
130 
131 public:
132  Slice() : BeginOffset(), EndOffset() {}
133  Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
134  : BeginOffset(BeginOffset), EndOffset(EndOffset),
135  UseAndIsSplittable(U, IsSplittable) {}
136 
137  uint64_t beginOffset() const { return BeginOffset; }
138  uint64_t endOffset() const { return EndOffset; }
139 
140  bool isSplittable() const { return UseAndIsSplittable.getInt(); }
141  void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
142 
143  Use *getUse() const { return UseAndIsSplittable.getPointer(); }
144 
145  bool isDead() const { return getUse() == nullptr; }
146  void kill() { UseAndIsSplittable.setPointer(nullptr); }
147 
148  /// \brief Support for ordering ranges.
149  ///
150  /// This provides an ordering over ranges such that start offsets are
151  /// always increasing, and within equal start offsets, the end offsets are
152  /// decreasing. Thus the spanning range comes first in a cluster with the
153  /// same start position.
154  bool operator<(const Slice &RHS) const {
155  if (beginOffset() < RHS.beginOffset())
156  return true;
157  if (beginOffset() > RHS.beginOffset())
158  return false;
159  if (isSplittable() != RHS.isSplittable())
160  return !isSplittable();
161  if (endOffset() > RHS.endOffset())
162  return true;
163  return false;
164  }
165 
166  /// \brief Support comparison with a single offset to allow binary searches.
167  friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
168  uint64_t RHSOffset) {
169  return LHS.beginOffset() < RHSOffset;
170  }
171  friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
172  const Slice &RHS) {
173  return LHSOffset < RHS.beginOffset();
174  }
175 
176  bool operator==(const Slice &RHS) const {
177  return isSplittable() == RHS.isSplittable() &&
178  beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
179  }
180  bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
181 };
182 } // end anonymous namespace
183 
184 namespace llvm {
185 template <typename T> struct isPodLike;
186 template <> struct isPodLike<Slice> { static const bool value = true; };
187 }
188 
189 /// \brief Representation of the alloca slices.
190 ///
191 /// This class represents the slices of an alloca which are formed by its
192 /// various uses. If a pointer escapes, we can't fully build a representation
193 /// for the slices used and we reflect that in this structure. The uses are
194 /// stored, sorted by increasing beginning offset and with unsplittable slices
195 /// starting at a particular offset before splittable slices.
197 public:
198  /// \brief Construct the slices of a particular alloca.
199  AllocaSlices(const DataLayout &DL, AllocaInst &AI);
200 
201  /// \brief Test whether a pointer to the allocation escapes our analysis.
202  ///
203  /// If this is true, the slices are never fully built and should be
204  /// ignored.
205  bool isEscaped() const { return PointerEscapingInstr; }
206 
207  /// \brief Support for iterating over the slices.
208  /// @{
211  iterator begin() { return Slices.begin(); }
212  iterator end() { return Slices.end(); }
213 
216  const_iterator begin() const { return Slices.begin(); }
217  const_iterator end() const { return Slices.end(); }
218  /// @}
219 
220  /// \brief Erase a range of slices.
221  void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
222 
223  /// \brief Insert new slices for this alloca.
224  ///
225  /// This moves the slices into the alloca's slices collection, and re-sorts
226  /// everything so that the usual ordering properties of the alloca's slices
227  /// hold.
228  void insert(ArrayRef<Slice> NewSlices) {
229  int OldSize = Slices.size();
230  Slices.append(NewSlices.begin(), NewSlices.end());
231  auto SliceI = Slices.begin() + OldSize;
232  std::sort(SliceI, Slices.end());
233  std::inplace_merge(Slices.begin(), SliceI, Slices.end());
234  }
235 
236  // Forward declare the iterator and range accessor for walking the
237  // partitions.
238  class partition_iterator;
240 
241  /// \brief Access the dead users for this alloca.
242  ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
243 
244  /// \brief Access the dead operands referring to this alloca.
245  ///
246  /// These are operands which have cannot actually be used to refer to the
247  /// alloca as they are outside its range and the user doesn't correct for
248  /// that. These mostly consist of PHI node inputs and the like which we just
249  /// need to replace with undef.
250  ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
251 
252 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
253  void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
255  StringRef Indent = " ") const;
257  StringRef Indent = " ") const;
258  void print(raw_ostream &OS) const;
259  void dump(const_iterator I) const;
260  void dump() const;
261 #endif
262 
263 private:
264  template <typename DerivedT, typename RetT = void> class BuilderBase;
265  class SliceBuilder;
266  friend class AllocaSlices::SliceBuilder;
267 
268 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
269  /// \brief Handle to alloca instruction to simplify method interfaces.
270  AllocaInst &AI;
271 #endif
272 
273  /// \brief The instruction responsible for this alloca not having a known set
274  /// of slices.
275  ///
276  /// When an instruction (potentially) escapes the pointer to the alloca, we
277  /// store a pointer to that here and abort trying to form slices of the
278  /// alloca. This will be null if the alloca slices are analyzed successfully.
279  Instruction *PointerEscapingInstr;
280 
281  /// \brief The slices of the alloca.
282  ///
283  /// We store a vector of the slices formed by uses of the alloca here. This
284  /// vector is sorted by increasing begin offset, and then the unsplittable
285  /// slices before the splittable ones. See the Slice inner class for more
286  /// details.
287  SmallVector<Slice, 8> Slices;
288 
289  /// \brief Instructions which will become dead if we rewrite the alloca.
290  ///
291  /// Note that these are not separated by slice. This is because we expect an
292  /// alloca to be completely rewritten or not rewritten at all. If rewritten,
293  /// all these instructions can simply be removed and replaced with undef as
294  /// they come from outside of the allocated space.
296 
297  /// \brief Operands which will become dead if we rewrite the alloca.
298  ///
299  /// These are operands that in their particular use can be replaced with
300  /// undef when we rewrite the alloca. These show up in out-of-bounds inputs
301  /// to PHI nodes and the like. They aren't entirely dead (there might be
302  /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
303  /// want to swap this particular input for undef to simplify the use lists of
304  /// the alloca.
305  SmallVector<Use *, 8> DeadOperands;
306 };
307 
308 /// \brief A partition of the slices.
309 ///
310 /// An ephemeral representation for a range of slices which can be viewed as
311 /// a partition of the alloca. This range represents a span of the alloca's
312 /// memory which cannot be split, and provides access to all of the slices
313 /// overlapping some part of the partition.
314 ///
315 /// Objects of this type are produced by traversing the alloca's slices, but
316 /// are only ephemeral and not persistent.
318 private:
319  friend class AllocaSlices;
321 
322  typedef AllocaSlices::iterator iterator;
323 
324  /// \brief The beginning and ending offsets of the alloca for this
325  /// partition.
326  uint64_t BeginOffset, EndOffset;
327 
328  /// \brief The start end end iterators of this partition.
329  iterator SI, SJ;
330 
331  /// \brief A collection of split slice tails overlapping the partition.
332  SmallVector<Slice *, 4> SplitTails;
333 
334  /// \brief Raw constructor builds an empty partition starting and ending at
335  /// the given iterator.
336  Partition(iterator SI) : SI(SI), SJ(SI) {}
337 
338 public:
339  /// \brief The start offset of this partition.
340  ///
341  /// All of the contained slices start at or after this offset.
342  uint64_t beginOffset() const { return BeginOffset; }
343 
344  /// \brief The end offset of this partition.
345  ///
346  /// All of the contained slices end at or before this offset.
347  uint64_t endOffset() const { return EndOffset; }
348 
349  /// \brief The size of the partition.
350  ///
351  /// Note that this can never be zero.
352  uint64_t size() const {
353  assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
354  return EndOffset - BeginOffset;
355  }
356 
357  /// \brief Test whether this partition contains no slices, and merely spans
358  /// a region occupied by split slices.
359  bool empty() const { return SI == SJ; }
360 
361  /// \name Iterate slices that start within the partition.
362  /// These may be splittable or unsplittable. They have a begin offset >= the
363  /// partition begin offset.
364  /// @{
365  // FIXME: We should probably define a "concat_iterator" helper and use that
366  // to stitch together pointee_iterators over the split tails and the
367  // contiguous iterators of the partition. That would give a much nicer
368  // interface here. We could then additionally expose filtered iterators for
369  // split, unsplit, and unsplittable splices based on the usage patterns.
370  iterator begin() const { return SI; }
371  iterator end() const { return SJ; }
372  /// @}
373 
374  /// \brief Get the sequence of split slice tails.
375  ///
376  /// These tails are of slices which start before this partition but are
377  /// split and overlap into the partition. We accumulate these while forming
378  /// partitions.
379  ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
380 };
381 
382 /// \brief An iterator over partitions of the alloca's slices.
383 ///
384 /// This iterator implements the core algorithm for partitioning the alloca's
385 /// slices. It is a forward iterator as we don't support backtracking for
386 /// efficiency reasons, and re-use a single storage area to maintain the
387 /// current set of split slices.
388 ///
389 /// It is templated on the slice iterator type to use so that it can operate
390 /// with either const or non-const slice iterators.
392  : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
393  Partition> {
394  friend class AllocaSlices;
395 
396  /// \brief Most of the state for walking the partitions is held in a class
397  /// with a nice interface for examining them.
398  Partition P;
399 
400  /// \brief We need to keep the end of the slices to know when to stop.
402 
403  /// \brief We also need to keep track of the maximum split end offset seen.
404  /// FIXME: Do we really?
405  uint64_t MaxSplitSliceEndOffset;
406 
407  /// \brief Sets the partition to be empty at given iterator, and sets the
408  /// end iterator.
410  : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
411  // If not already at the end, advance our state to form the initial
412  // partition.
413  if (SI != SE)
414  advance();
415  }
416 
417  /// \brief Advance the iterator to the next partition.
418  ///
419  /// Requires that the iterator not be at the end of the slices.
420  void advance() {
421  assert((P.SI != SE || !P.SplitTails.empty()) &&
422  "Cannot advance past the end of the slices!");
423 
424  // Clear out any split uses which have ended.
425  if (!P.SplitTails.empty()) {
426  if (P.EndOffset >= MaxSplitSliceEndOffset) {
427  // If we've finished all splits, this is easy.
428  P.SplitTails.clear();
429  MaxSplitSliceEndOffset = 0;
430  } else {
431  // Remove the uses which have ended in the prior partition. This
432  // cannot change the max split slice end because we just checked that
433  // the prior partition ended prior to that max.
434  P.SplitTails.erase(
435  remove_if(P.SplitTails,
436  [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
437  P.SplitTails.end());
438  assert(any_of(P.SplitTails,
439  [&](Slice *S) {
440  return S->endOffset() == MaxSplitSliceEndOffset;
441  }) &&
442  "Could not find the current max split slice offset!");
443  assert(all_of(P.SplitTails,
444  [&](Slice *S) {
445  return S->endOffset() <= MaxSplitSliceEndOffset;
446  }) &&
447  "Max split slice end offset is not actually the max!");
448  }
449  }
450 
451  // If P.SI is already at the end, then we've cleared the split tail and
452  // now have an end iterator.
453  if (P.SI == SE) {
454  assert(P.SplitTails.empty() && "Failed to clear the split slices!");
455  return;
456  }
457 
458  // If we had a non-empty partition previously, set up the state for
459  // subsequent partitions.
460  if (P.SI != P.SJ) {
461  // Accumulate all the splittable slices which started in the old
462  // partition into the split list.
463  for (Slice &S : P)
464  if (S.isSplittable() && S.endOffset() > P.EndOffset) {
465  P.SplitTails.push_back(&S);
466  MaxSplitSliceEndOffset =
467  std::max(S.endOffset(), MaxSplitSliceEndOffset);
468  }
469 
470  // Start from the end of the previous partition.
471  P.SI = P.SJ;
472 
473  // If P.SI is now at the end, we at most have a tail of split slices.
474  if (P.SI == SE) {
475  P.BeginOffset = P.EndOffset;
476  P.EndOffset = MaxSplitSliceEndOffset;
477  return;
478  }
479 
480  // If the we have split slices and the next slice is after a gap and is
481  // not splittable immediately form an empty partition for the split
482  // slices up until the next slice begins.
483  if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
484  !P.SI->isSplittable()) {
485  P.BeginOffset = P.EndOffset;
486  P.EndOffset = P.SI->beginOffset();
487  return;
488  }
489  }
490 
491  // OK, we need to consume new slices. Set the end offset based on the
492  // current slice, and step SJ past it. The beginning offset of the
493  // partition is the beginning offset of the next slice unless we have
494  // pre-existing split slices that are continuing, in which case we begin
495  // at the prior end offset.
496  P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
497  P.EndOffset = P.SI->endOffset();
498  ++P.SJ;
499 
500  // There are two strategies to form a partition based on whether the
501  // partition starts with an unsplittable slice or a splittable slice.
502  if (!P.SI->isSplittable()) {
503  // When we're forming an unsplittable region, it must always start at
504  // the first slice and will extend through its end.
505  assert(P.BeginOffset == P.SI->beginOffset());
506 
507  // Form a partition including all of the overlapping slices with this
508  // unsplittable slice.
509  while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
510  if (!P.SJ->isSplittable())
511  P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
512  ++P.SJ;
513  }
514 
515  // We have a partition across a set of overlapping unsplittable
516  // partitions.
517  return;
518  }
519 
520  // If we're starting with a splittable slice, then we need to form
521  // a synthetic partition spanning it and any other overlapping splittable
522  // splices.
523  assert(P.SI->isSplittable() && "Forming a splittable partition!");
524 
525  // Collect all of the overlapping splittable slices.
526  while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
527  P.SJ->isSplittable()) {
528  P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
529  ++P.SJ;
530  }
531 
532  // Back upiP.EndOffset if we ended the span early when encountering an
533  // unsplittable slice. This synthesizes the early end offset of
534  // a partition spanning only splittable slices.
535  if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
536  assert(!P.SJ->isSplittable());
537  P.EndOffset = P.SJ->beginOffset();
538  }
539  }
540 
541 public:
542  bool operator==(const partition_iterator &RHS) const {
543  assert(SE == RHS.SE &&
544  "End iterators don't match between compared partition iterators!");
545 
546  // The observed positions of partitions is marked by the P.SI iterator and
547  // the emptiness of the split slices. The latter is only relevant when
548  // P.SI == SE, as the end iterator will additionally have an empty split
549  // slices list, but the prior may have the same P.SI and a tail of split
550  // slices.
551  if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
552  assert(P.SJ == RHS.P.SJ &&
553  "Same set of slices formed two different sized partitions!");
554  assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
555  "Same slice position with differently sized non-empty split "
556  "slice tails!");
557  return true;
558  }
559  return false;
560  }
561 
563  advance();
564  return *this;
565  }
566 
567  Partition &operator*() { return P; }
568 };
569 
570 /// \brief A forward range over the partitions of the alloca's slices.
571 ///
572 /// This accesses an iterator range over the partitions of the alloca's
573 /// slices. It computes these partitions on the fly based on the overlapping
574 /// offsets of the slices and the ability to split them. It will visit "empty"
575 /// partitions to cover regions of the alloca only accessed via split
576 /// slices.
577 iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
578  return make_range(partition_iterator(begin(), end()),
579  partition_iterator(end(), end()));
580 }
581 
583  // If the condition being selected on is a constant or the same value is
584  // being selected between, fold the select. Yes this does (rarely) happen
585  // early on.
586  if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
587  return SI.getOperand(1 + CI->isZero());
588  if (SI.getOperand(1) == SI.getOperand(2))
589  return SI.getOperand(1);
590 
591  return nullptr;
592 }
593 
594 /// \brief A helper that folds a PHI node or a select.
596  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
597  // If PN merges together the same value, return that value.
598  return PN->hasConstantValue();
599  }
600  return foldSelectInst(cast<SelectInst>(I));
601 }
602 
603 /// \brief Builder for the alloca slices.
604 ///
605 /// This class builds a set of alloca slices by recursively visiting the uses
606 /// of an alloca and making a slice for each load and store at each offset.
607 class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
609  friend class InstVisitor<SliceBuilder>;
611 
612  const uint64_t AllocSize;
613  AllocaSlices &AS;
614 
615  SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
617 
618  /// \brief Set to de-duplicate dead instructions found in the use walk.
619  SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
620 
621 public:
624  AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), AS(AS) {}
625 
626 private:
627  void markAsDead(Instruction &I) {
628  if (VisitedDeadInsts.insert(&I).second)
629  AS.DeadUsers.push_back(&I);
630  }
631 
632  void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
633  bool IsSplittable = false) {
634  // Completely skip uses which have a zero size or start either before or
635  // past the end of the allocation.
636  if (Size == 0 || Offset.uge(AllocSize)) {
637  DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
638  << " which has zero size or starts outside of the "
639  << AllocSize << " byte alloca:\n"
640  << " alloca: " << AS.AI << "\n"
641  << " use: " << I << "\n");
642  return markAsDead(I);
643  }
644 
645  uint64_t BeginOffset = Offset.getZExtValue();
646  uint64_t EndOffset = BeginOffset + Size;
647 
648  // Clamp the end offset to the end of the allocation. Note that this is
649  // formulated to handle even the case where "BeginOffset + Size" overflows.
650  // This may appear superficially to be something we could ignore entirely,
651  // but that is not so! There may be widened loads or PHI-node uses where
652  // some instructions are dead but not others. We can't completely ignore
653  // them, and so have to record at least the information here.
654  assert(AllocSize >= BeginOffset); // Established above.
655  if (Size > AllocSize - BeginOffset) {
656  DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
657  << " to remain within the " << AllocSize << " byte alloca:\n"
658  << " alloca: " << AS.AI << "\n"
659  << " use: " << I << "\n");
660  EndOffset = AllocSize;
661  }
662 
663  AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
664  }
665 
666  void visitBitCastInst(BitCastInst &BC) {
667  if (BC.use_empty())
668  return markAsDead(BC);
669 
670  return Base::visitBitCastInst(BC);
671  }
672 
673  void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
674  if (GEPI.use_empty())
675  return markAsDead(GEPI);
676 
677  if (SROAStrictInbounds && GEPI.isInBounds()) {
678  // FIXME: This is a manually un-factored variant of the basic code inside
679  // of GEPs with checking of the inbounds invariant specified in the
680  // langref in a very strict sense. If we ever want to enable
681  // SROAStrictInbounds, this code should be factored cleanly into
682  // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
683  // by writing out the code here where we have the underlying allocation
684  // size readily available.
685  APInt GEPOffset = Offset;
686  const DataLayout &DL = GEPI.getModule()->getDataLayout();
687  for (gep_type_iterator GTI = gep_type_begin(GEPI),
688  GTE = gep_type_end(GEPI);
689  GTI != GTE; ++GTI) {
690  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
691  if (!OpC)
692  break;
693 
694  // Handle a struct index, which adds its field offset to the pointer.
695  if (StructType *STy = GTI.getStructTypeOrNull()) {
696  unsigned ElementIdx = OpC->getZExtValue();
697  const StructLayout *SL = DL.getStructLayout(STy);
698  GEPOffset +=
699  APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
700  } else {
701  // For array or vector indices, scale the index by the size of the
702  // type.
703  APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
704  GEPOffset += Index * APInt(Offset.getBitWidth(),
705  DL.getTypeAllocSize(GTI.getIndexedType()));
706  }
707 
708  // If this index has computed an intermediate pointer which is not
709  // inbounds, then the result of the GEP is a poison value and we can
710  // delete it and all uses.
711  if (GEPOffset.ugt(AllocSize))
712  return markAsDead(GEPI);
713  }
714  }
715 
716  return Base::visitGetElementPtrInst(GEPI);
717  }
718 
719  void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
720  uint64_t Size, bool IsVolatile) {
721  // We allow splitting of non-volatile loads and stores where the type is an
722  // integer type. These may be used to implement 'memcpy' or other "transfer
723  // of bits" patterns.
724  bool IsSplittable = Ty->isIntegerTy() && !IsVolatile;
725 
726  insertUse(I, Offset, Size, IsSplittable);
727  }
728 
729  void visitLoadInst(LoadInst &LI) {
730  assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
731  "All simple FCA loads should have been pre-split");
732 
733  if (!IsOffsetKnown)
734  return PI.setAborted(&LI);
735 
736  const DataLayout &DL = LI.getModule()->getDataLayout();
737  uint64_t Size = DL.getTypeStoreSize(LI.getType());
738  return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
739  }
740 
741  void visitStoreInst(StoreInst &SI) {
742  Value *ValOp = SI.getValueOperand();
743  if (ValOp == *U)
744  return PI.setEscapedAndAborted(&SI);
745  if (!IsOffsetKnown)
746  return PI.setAborted(&SI);
747 
748  const DataLayout &DL = SI.getModule()->getDataLayout();
749  uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
750 
751  // If this memory access can be shown to *statically* extend outside the
752  // bounds of of the allocation, it's behavior is undefined, so simply
753  // ignore it. Note that this is more strict than the generic clamping
754  // behavior of insertUse. We also try to handle cases which might run the
755  // risk of overflow.
756  // FIXME: We should instead consider the pointer to have escaped if this
757  // function is being instrumented for addressing bugs or race conditions.
758  if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
759  DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
760  << " which extends past the end of the " << AllocSize
761  << " byte alloca:\n"
762  << " alloca: " << AS.AI << "\n"
763  << " use: " << SI << "\n");
764  return markAsDead(SI);
765  }
766 
767  assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
768  "All simple FCA stores should have been pre-split");
769  handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
770  }
771 
772  void visitMemSetInst(MemSetInst &II) {
773  assert(II.getRawDest() == *U && "Pointer use is not the destination?");
774  ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
775  if ((Length && Length->getValue() == 0) ||
776  (IsOffsetKnown && Offset.uge(AllocSize)))
777  // Zero-length mem transfer intrinsics can be ignored entirely.
778  return markAsDead(II);
779 
780  if (!IsOffsetKnown)
781  return PI.setAborted(&II);
782 
783  insertUse(II, Offset, Length ? Length->getLimitedValue()
784  : AllocSize - Offset.getLimitedValue(),
785  (bool)Length);
786  }
787 
788  void visitMemTransferInst(MemTransferInst &II) {
789  ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
790  if (Length && Length->getValue() == 0)
791  // Zero-length mem transfer intrinsics can be ignored entirely.
792  return markAsDead(II);
793 
794  // Because we can visit these intrinsics twice, also check to see if the
795  // first time marked this instruction as dead. If so, skip it.
796  if (VisitedDeadInsts.count(&II))
797  return;
798 
799  if (!IsOffsetKnown)
800  return PI.setAborted(&II);
801 
802  // This side of the transfer is completely out-of-bounds, and so we can
803  // nuke the entire transfer. However, we also need to nuke the other side
804  // if already added to our partitions.
805  // FIXME: Yet another place we really should bypass this when
806  // instrumenting for ASan.
807  if (Offset.uge(AllocSize)) {
809  MemTransferSliceMap.find(&II);
810  if (MTPI != MemTransferSliceMap.end())
811  AS.Slices[MTPI->second].kill();
812  return markAsDead(II);
813  }
814 
815  uint64_t RawOffset = Offset.getLimitedValue();
816  uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
817 
818  // Check for the special case where the same exact value is used for both
819  // source and dest.
820  if (*U == II.getRawDest() && *U == II.getRawSource()) {
821  // For non-volatile transfers this is a no-op.
822  if (!II.isVolatile())
823  return markAsDead(II);
824 
825  return insertUse(II, Offset, Size, /*IsSplittable=*/false);
826  }
827 
828  // If we have seen both source and destination for a mem transfer, then
829  // they both point to the same alloca.
830  bool Inserted;
832  std::tie(MTPI, Inserted) =
833  MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
834  unsigned PrevIdx = MTPI->second;
835  if (!Inserted) {
836  Slice &PrevP = AS.Slices[PrevIdx];
837 
838  // Check if the begin offsets match and this is a non-volatile transfer.
839  // In that case, we can completely elide the transfer.
840  if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
841  PrevP.kill();
842  return markAsDead(II);
843  }
844 
845  // Otherwise we have an offset transfer within the same alloca. We can't
846  // split those.
847  PrevP.makeUnsplittable();
848  }
849 
850  // Insert the use now that we've fixed up the splittable nature.
851  insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
852 
853  // Check that we ended up with a valid index in the map.
854  assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
855  "Map index doesn't point back to a slice with this user.");
856  }
857 
858  // Disable SRoA for any intrinsics except for lifetime invariants.
859  // FIXME: What about debug intrinsics? This matches old behavior, but
860  // doesn't make sense.
861  void visitIntrinsicInst(IntrinsicInst &II) {
862  if (!IsOffsetKnown)
863  return PI.setAborted(&II);
864 
865  if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
866  II.getIntrinsicID() == Intrinsic::lifetime_end) {
867  ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
868  uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
869  Length->getLimitedValue());
870  insertUse(II, Offset, Size, true);
871  return;
872  }
873 
874  Base::visitIntrinsicInst(II);
875  }
876 
877  Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
878  // We consider any PHI or select that results in a direct load or store of
879  // the same offset to be a viable use for slicing purposes. These uses
880  // are considered unsplittable and the size is the maximum loaded or stored
881  // size.
884  Visited.insert(Root);
885  Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
886  const DataLayout &DL = Root->getModule()->getDataLayout();
887  // If there are no loads or stores, the access is dead. We mark that as
888  // a size zero access.
889  Size = 0;
890  do {
891  Instruction *I, *UsedI;
892  std::tie(UsedI, I) = Uses.pop_back_val();
893 
894  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
895  Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
896  continue;
897  }
898  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
899  Value *Op = SI->getOperand(0);
900  if (Op == UsedI)
901  return SI;
902  Size = std::max(Size, DL.getTypeStoreSize(Op->getType()));
903  continue;
904  }
905 
906  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
907  if (!GEP->hasAllZeroIndices())
908  return GEP;
909  } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
910  !isa<SelectInst>(I)) {
911  return I;
912  }
913 
914  for (User *U : I->users())
915  if (Visited.insert(cast<Instruction>(U)).second)
916  Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
917  } while (!Uses.empty());
918 
919  return nullptr;
920  }
921 
922  void visitPHINodeOrSelectInst(Instruction &I) {
923  assert(isa<PHINode>(I) || isa<SelectInst>(I));
924  if (I.use_empty())
925  return markAsDead(I);
926 
927  // TODO: We could use SimplifyInstruction here to fold PHINodes and
928  // SelectInsts. However, doing so requires to change the current
929  // dead-operand-tracking mechanism. For instance, suppose neither loading
930  // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
931  // trap either. However, if we simply replace %U with undef using the
932  // current dead-operand-tracking mechanism, "load (select undef, undef,
933  // %other)" may trap because the select may return the first operand
934  // "undef".
935  if (Value *Result = foldPHINodeOrSelectInst(I)) {
936  if (Result == *U)
937  // If the result of the constant fold will be the pointer, recurse
938  // through the PHI/select as if we had RAUW'ed it.
939  enqueueUsers(I);
940  else
941  // Otherwise the operand to the PHI/select is dead, and we can replace
942  // it with undef.
943  AS.DeadOperands.push_back(U);
944 
945  return;
946  }
947 
948  if (!IsOffsetKnown)
949  return PI.setAborted(&I);
950 
951  // See if we already have computed info on this node.
952  uint64_t &Size = PHIOrSelectSizes[&I];
953  if (!Size) {
954  // This is a new PHI/Select, check for an unsafe use of it.
955  if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
956  return PI.setAborted(UnsafeI);
957  }
958 
959  // For PHI and select operands outside the alloca, we can't nuke the entire
960  // phi or select -- the other side might still be relevant, so we special
961  // case them here and use a separate structure to track the operands
962  // themselves which should be replaced with undef.
963  // FIXME: This should instead be escaped in the event we're instrumenting
964  // for address sanitization.
965  if (Offset.uge(AllocSize)) {
966  AS.DeadOperands.push_back(U);
967  return;
968  }
969 
970  insertUse(I, Offset, Size);
971  }
972 
973  void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
974 
975  void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
976 
977  /// \brief Disable SROA entirely if there are unhandled users of the alloca.
978  void visitInstruction(Instruction &I) { PI.setAborted(&I); }
979 };
980 
982  :
983 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
984  AI(AI),
985 #endif
986  PointerEscapingInstr(nullptr) {
987  SliceBuilder PB(DL, AI, *this);
988  SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
989  if (PtrI.isEscaped() || PtrI.isAborted()) {
990  // FIXME: We should sink the escape vs. abort info into the caller nicely,
991  // possibly by just storing the PtrInfo in the AllocaSlices.
992  PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
993  : PtrI.getAbortingInst();
994  assert(PointerEscapingInstr && "Did not track a bad instruction");
995  return;
996  }
997 
998  Slices.erase(remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
999  Slices.end());
1000 
1001 #ifndef NDEBUG
1003  std::mt19937 MT(static_cast<unsigned>(
1004  std::chrono::system_clock::now().time_since_epoch().count()));
1005  std::shuffle(Slices.begin(), Slices.end(), MT);
1006  }
1007 #endif
1008 
1009  // Sort the uses. This arranges for the offsets to be in ascending order,
1010  // and the sizes to be in descending order.
1011  std::sort(Slices.begin(), Slices.end());
1012 }
1013 
1014 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1015 
1016 void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1017  StringRef Indent) const {
1018  printSlice(OS, I, Indent);
1019  OS << "\n";
1020  printUse(OS, I, Indent);
1021 }
1022 
1023 void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1024  StringRef Indent) const {
1025  OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1026  << " slice #" << (I - begin())
1027  << (I->isSplittable() ? " (splittable)" : "");
1028 }
1029 
1030 void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1031  StringRef Indent) const {
1032  OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1033 }
1034 
1035 void AllocaSlices::print(raw_ostream &OS) const {
1036  if (PointerEscapingInstr) {
1037  OS << "Can't analyze slices for alloca: " << AI << "\n"
1038  << " A pointer to this alloca escaped by:\n"
1039  << " " << *PointerEscapingInstr << "\n";
1040  return;
1041  }
1042 
1043  OS << "Slices of alloca: " << AI << "\n";
1044  for (const_iterator I = begin(), E = end(); I != E; ++I)
1045  print(OS, I);
1046 }
1047 
1048 LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1049  print(dbgs(), I);
1050 }
1051 LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1052 
1053 #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1054 
1055 /// Walk the range of a partitioning looking for a common type to cover this
1056 /// sequence of slices.
1057 static Type *findCommonType(AllocaSlices::const_iterator B,
1058  AllocaSlices::const_iterator E,
1059  uint64_t EndOffset) {
1060  Type *Ty = nullptr;
1061  bool TyIsCommon = true;
1062  IntegerType *ITy = nullptr;
1063 
1064  // Note that we need to look at *every* alloca slice's Use to ensure we
1065  // always get consistent results regardless of the order of slices.
1066  for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1067  Use *U = I->getUse();
1068  if (isa<IntrinsicInst>(*U->getUser()))
1069  continue;
1070  if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1071  continue;
1072 
1073  Type *UserTy = nullptr;
1074  if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1075  UserTy = LI->getType();
1076  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1077  UserTy = SI->getValueOperand()->getType();
1078  }
1079 
1080  if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1081  // If the type is larger than the partition, skip it. We only encounter
1082  // this for split integer operations where we want to use the type of the
1083  // entity causing the split. Also skip if the type is not a byte width
1084  // multiple.
1085  if (UserITy->getBitWidth() % 8 != 0 ||
1086  UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1087  continue;
1088 
1089  // Track the largest bitwidth integer type used in this way in case there
1090  // is no common type.
1091  if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1092  ITy = UserITy;
1093  }
1094 
1095  // To avoid depending on the order of slices, Ty and TyIsCommon must not
1096  // depend on types skipped above.
1097  if (!UserTy || (Ty && Ty != UserTy))
1098  TyIsCommon = false; // Give up on anything but an iN type.
1099  else
1100  Ty = UserTy;
1101  }
1102 
1103  return TyIsCommon ? Ty : ITy;
1104 }
1105 
1106 /// PHI instructions that use an alloca and are subsequently loaded can be
1107 /// rewritten to load both input pointers in the pred blocks and then PHI the
1108 /// results, allowing the load of the alloca to be promoted.
1109 /// From this:
1110 /// %P2 = phi [i32* %Alloca, i32* %Other]
1111 /// %V = load i32* %P2
1112 /// to:
1113 /// %V1 = load i32* %Alloca -> will be mem2reg'd
1114 /// ...
1115 /// %V2 = load i32* %Other
1116 /// ...
1117 /// %V = phi [i32 %V1, i32 %V2]
1118 ///
1119 /// We can do this to a select if its only uses are loads and if the operands
1120 /// to the select can be loaded unconditionally.
1121 ///
1122 /// FIXME: This should be hoisted into a generic utility, likely in
1123 /// Transforms/Util/Local.h
1124 static bool isSafePHIToSpeculate(PHINode &PN) {
1125  // For now, we can only do this promotion if the load is in the same block
1126  // as the PHI, and if there are no stores between the phi and load.
1127  // TODO: Allow recursive phi users.
1128  // TODO: Allow stores.
1129  BasicBlock *BB = PN.getParent();
1130  unsigned MaxAlign = 0;
1131  bool HaveLoad = false;
1132  for (User *U : PN.users()) {
1133  LoadInst *LI = dyn_cast<LoadInst>(U);
1134  if (!LI || !LI->isSimple())
1135  return false;
1136 
1137  // For now we only allow loads in the same block as the PHI. This is
1138  // a common case that happens when instcombine merges two loads through
1139  // a PHI.
1140  if (LI->getParent() != BB)
1141  return false;
1142 
1143  // Ensure that there are no instructions between the PHI and the load that
1144  // could store.
1145  for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1146  if (BBI->mayWriteToMemory())
1147  return false;
1148 
1149  MaxAlign = std::max(MaxAlign, LI->getAlignment());
1150  HaveLoad = true;
1151  }
1152 
1153  if (!HaveLoad)
1154  return false;
1155 
1156  const DataLayout &DL = PN.getModule()->getDataLayout();
1157 
1158  // We can only transform this if it is safe to push the loads into the
1159  // predecessor blocks. The only thing to watch out for is that we can't put
1160  // a possibly trapping load in the predecessor if it is a critical edge.
1161  for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1163  Value *InVal = PN.getIncomingValue(Idx);
1164 
1165  // If the value is produced by the terminator of the predecessor (an
1166  // invoke) or it has side-effects, there is no valid place to put a load
1167  // in the predecessor.
1168  if (TI == InVal || TI->mayHaveSideEffects())
1169  return false;
1170 
1171  // If the predecessor has a single successor, then the edge isn't
1172  // critical.
1173  if (TI->getNumSuccessors() == 1)
1174  continue;
1175 
1176  // If this pointer is always safe to load, or if we can prove that there
1177  // is already a load in the block, then we can move the load to the pred
1178  // block.
1179  if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, TI))
1180  continue;
1181 
1182  return false;
1183  }
1184 
1185  return true;
1186 }
1187 
1188 static void speculatePHINodeLoads(PHINode &PN) {
1189  DEBUG(dbgs() << " original: " << PN << "\n");
1190 
1191  Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
1192  IRBuilderTy PHIBuilder(&PN);
1193  PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1194  PN.getName() + ".sroa.speculated");
1195 
1196  // Get the AA tags and alignment to use from one of the loads. It doesn't
1197  // matter which one we get and if any differ.
1198  LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1199 
1200  AAMDNodes AATags;
1201  SomeLoad->getAAMetadata(AATags);
1202  unsigned Align = SomeLoad->getAlignment();
1203 
1204  // Rewrite all loads of the PN to use the new PHI.
1205  while (!PN.use_empty()) {
1206  LoadInst *LI = cast<LoadInst>(PN.user_back());
1207  LI->replaceAllUsesWith(NewPN);
1208  LI->eraseFromParent();
1209  }
1210 
1211  // Inject loads into all of the pred blocks.
1212  for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1213  BasicBlock *Pred = PN.getIncomingBlock(Idx);
1214  TerminatorInst *TI = Pred->getTerminator();
1215  Value *InVal = PN.getIncomingValue(Idx);
1216  IRBuilderTy PredBuilder(TI);
1217 
1218  LoadInst *Load = PredBuilder.CreateLoad(
1219  InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1220  ++NumLoadsSpeculated;
1221  Load->setAlignment(Align);
1222  if (AATags)
1223  Load->setAAMetadata(AATags);
1224  NewPN->addIncoming(Load, Pred);
1225  }
1226 
1227  DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1228  PN.eraseFromParent();
1229 }
1230 
1231 /// Select instructions that use an alloca and are subsequently loaded can be
1232 /// rewritten to load both input pointers and then select between the result,
1233 /// allowing the load of the alloca to be promoted.
1234 /// From this:
1235 /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
1236 /// %V = load i32* %P2
1237 /// to:
1238 /// %V1 = load i32* %Alloca -> will be mem2reg'd
1239 /// %V2 = load i32* %Other
1240 /// %V = select i1 %cond, i32 %V1, i32 %V2
1241 ///
1242 /// We can do this to a select if its only uses are loads and if the operand
1243 /// to the select can be loaded unconditionally.
1245  Value *TValue = SI.getTrueValue();
1246  Value *FValue = SI.getFalseValue();
1247  const DataLayout &DL = SI.getModule()->getDataLayout();
1248 
1249  for (User *U : SI.users()) {
1250  LoadInst *LI = dyn_cast<LoadInst>(U);
1251  if (!LI || !LI->isSimple())
1252  return false;
1253 
1254  // Both operands to the select need to be dereferencable, either
1255  // absolutely (e.g. allocas) or at this point because we can see other
1256  // accesses to it.
1257  if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI))
1258  return false;
1259  if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), DL, LI))
1260  return false;
1261  }
1262 
1263  return true;
1264 }
1265 
1267  DEBUG(dbgs() << " original: " << SI << "\n");
1268 
1269  IRBuilderTy IRB(&SI);
1270  Value *TV = SI.getTrueValue();
1271  Value *FV = SI.getFalseValue();
1272  // Replace the loads of the select with a select of two loads.
1273  while (!SI.use_empty()) {
1274  LoadInst *LI = cast<LoadInst>(SI.user_back());
1275  assert(LI->isSimple() && "We only speculate simple loads");
1276 
1277  IRB.SetInsertPoint(LI);
1278  LoadInst *TL =
1279  IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
1280  LoadInst *FL =
1281  IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
1282  NumLoadsSpeculated += 2;
1283 
1284  // Transfer alignment and AA info if present.
1285  TL->setAlignment(LI->getAlignment());
1286  FL->setAlignment(LI->getAlignment());
1287 
1288  AAMDNodes Tags;
1289  LI->getAAMetadata(Tags);
1290  if (Tags) {
1291  TL->setAAMetadata(Tags);
1292  FL->setAAMetadata(Tags);
1293  }
1294 
1295  Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1296  LI->getName() + ".sroa.speculated");
1297 
1298  DEBUG(dbgs() << " speculated to: " << *V << "\n");
1299  LI->replaceAllUsesWith(V);
1300  LI->eraseFromParent();
1301  }
1302  SI.eraseFromParent();
1303 }
1304 
1305 /// \brief Build a GEP out of a base pointer and indices.
1306 ///
1307 /// This will return the BasePtr if that is valid, or build a new GEP
1308 /// instruction using the IRBuilder if GEP-ing is needed.
1309 static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
1310  SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
1311  if (Indices.empty())
1312  return BasePtr;
1313 
1314  // A single zero index is a no-op, so check for this and avoid building a GEP
1315  // in that case.
1316  if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
1317  return BasePtr;
1318 
1319  return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
1320  NamePrefix + "sroa_idx");
1321 }
1322 
1323 /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
1324 /// TargetTy without changing the offset of the pointer.
1325 ///
1326 /// This routine assumes we've already established a properly offset GEP with
1327 /// Indices, and arrived at the Ty type. The goal is to continue to GEP with
1328 /// zero-indices down through type layers until we find one the same as
1329 /// TargetTy. If we can't find one with the same type, we at least try to use
1330 /// one with the same size. If none of that works, we just produce the GEP as
1331 /// indicated by Indices to have the correct offset.
1332 static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
1333  Value *BasePtr, Type *Ty, Type *TargetTy,
1334  SmallVectorImpl<Value *> &Indices,
1335  Twine NamePrefix) {
1336  if (Ty == TargetTy)
1337  return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1338 
1339  // Pointer size to use for the indices.
1340  unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType());
1341 
1342  // See if we can descend into a struct and locate a field with the correct
1343  // type.
1344  unsigned NumLayers = 0;
1345  Type *ElementTy = Ty;
1346  do {
1347  if (ElementTy->isPointerTy())
1348  break;
1349 
1350  if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
1351  ElementTy = ArrayTy->getElementType();
1352  Indices.push_back(IRB.getIntN(PtrSize, 0));
1353  } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
1354  ElementTy = VectorTy->getElementType();
1355  Indices.push_back(IRB.getInt32(0));
1356  } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
1357  if (STy->element_begin() == STy->element_end())
1358  break; // Nothing left to descend into.
1359  ElementTy = *STy->element_begin();
1360  Indices.push_back(IRB.getInt32(0));
1361  } else {
1362  break;
1363  }
1364  ++NumLayers;
1365  } while (ElementTy != TargetTy);
1366  if (ElementTy != TargetTy)
1367  Indices.erase(Indices.end() - NumLayers, Indices.end());
1368 
1369  return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1370 }
1371 
1372 /// \brief Recursively compute indices for a natural GEP.
1373 ///
1374 /// This is the recursive step for getNaturalGEPWithOffset that walks down the
1375 /// element types adding appropriate indices for the GEP.
1376 static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
1377  Value *Ptr, Type *Ty, APInt &Offset,
1378  Type *TargetTy,
1379  SmallVectorImpl<Value *> &Indices,
1380  Twine NamePrefix) {
1381  if (Offset == 0)
1382  return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
1383  NamePrefix);
1384 
1385  // We can't recurse through pointer types.
1386  if (Ty->isPointerTy())
1387  return nullptr;
1388 
1389  // We try to analyze GEPs over vectors here, but note that these GEPs are
1390  // extremely poorly defined currently. The long-term goal is to remove GEPing
1391  // over a vector from the IR completely.
1392  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
1393  unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
1394  if (ElementSizeInBits % 8 != 0) {
1395  // GEPs over non-multiple of 8 size vector elements are invalid.
1396  return nullptr;
1397  }
1398  APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
1399  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1400  if (NumSkippedElements.ugt(VecTy->getNumElements()))
1401  return nullptr;
1402  Offset -= NumSkippedElements * ElementSize;
1403  Indices.push_back(IRB.getInt(NumSkippedElements));
1404  return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
1405  Offset, TargetTy, Indices, NamePrefix);
1406  }
1407 
1408  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
1409  Type *ElementTy = ArrTy->getElementType();
1410  APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
1411  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1412  if (NumSkippedElements.ugt(ArrTy->getNumElements()))
1413  return nullptr;
1414 
1415  Offset -= NumSkippedElements * ElementSize;
1416  Indices.push_back(IRB.getInt(NumSkippedElements));
1417  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1418  Indices, NamePrefix);
1419  }
1420 
1421  StructType *STy = dyn_cast<StructType>(Ty);
1422  if (!STy)
1423  return nullptr;
1424 
1425  const StructLayout *SL = DL.getStructLayout(STy);
1426  uint64_t StructOffset = Offset.getZExtValue();
1427  if (StructOffset >= SL->getSizeInBytes())
1428  return nullptr;
1429  unsigned Index = SL->getElementContainingOffset(StructOffset);
1430  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
1431  Type *ElementTy = STy->getElementType(Index);
1432  if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
1433  return nullptr; // The offset points into alignment padding.
1434 
1435  Indices.push_back(IRB.getInt32(Index));
1436  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1437  Indices, NamePrefix);
1438 }
1439 
1440 /// \brief Get a natural GEP from a base pointer to a particular offset and
1441 /// resulting in a particular type.
1442 ///
1443 /// The goal is to produce a "natural" looking GEP that works with the existing
1444 /// composite types to arrive at the appropriate offset and element type for
1445 /// a pointer. TargetTy is the element type the returned GEP should point-to if
1446 /// possible. We recurse by decreasing Offset, adding the appropriate index to
1447 /// Indices, and setting Ty to the result subtype.
1448 ///
1449 /// If no natural GEP can be constructed, this function returns null.
1450 static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
1451  Value *Ptr, APInt Offset, Type *TargetTy,
1452  SmallVectorImpl<Value *> &Indices,
1453  Twine NamePrefix) {
1454  PointerType *Ty = cast<PointerType>(Ptr->getType());
1455 
1456  // Don't consider any GEPs through an i8* as natural unless the TargetTy is
1457  // an i8.
1458  if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
1459  return nullptr;
1460 
1461  Type *ElementTy = Ty->getElementType();
1462  if (!ElementTy->isSized())
1463  return nullptr; // We can't GEP through an unsized element.
1464  APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
1465  if (ElementSize == 0)
1466  return nullptr; // Zero-length arrays can't help us build a natural GEP.
1467  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1468 
1469  Offset -= NumSkippedElements * ElementSize;
1470  Indices.push_back(IRB.getInt(NumSkippedElements));
1471  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1472  Indices, NamePrefix);
1473 }
1474 
1475 /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
1476 /// resulting pointer has PointerTy.
1477 ///
1478 /// This tries very hard to compute a "natural" GEP which arrives at the offset
1479 /// and produces the pointer type desired. Where it cannot, it will try to use
1480 /// the natural GEP to arrive at the offset and bitcast to the type. Where that
1481 /// fails, it will try to use an existing i8* and GEP to the byte offset and
1482 /// bitcast to the type.
1483 ///
1484 /// The strategy for finding the more natural GEPs is to peel off layers of the
1485 /// pointer, walking back through bit casts and GEPs, searching for a base
1486 /// pointer from which we can compute a natural GEP with the desired
1487 /// properties. The algorithm tries to fold as many constant indices into
1488 /// a single GEP as possible, thus making each GEP more independent of the
1489 /// surrounding code.
1490 static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1491  APInt Offset, Type *PointerTy, Twine NamePrefix) {
1492  // Even though we don't look through PHI nodes, we could be called on an
1493  // instruction in an unreachable block, which may be on a cycle.
1494  SmallPtrSet<Value *, 4> Visited;
1495  Visited.insert(Ptr);
1496  SmallVector<Value *, 4> Indices;
1497 
1498  // We may end up computing an offset pointer that has the wrong type. If we
1499  // never are able to compute one directly that has the correct type, we'll
1500  // fall back to it, so keep it and the base it was computed from around here.
1501  Value *OffsetPtr = nullptr;
1502  Value *OffsetBasePtr;
1503 
1504  // Remember any i8 pointer we come across to re-use if we need to do a raw
1505  // byte offset.
1506  Value *Int8Ptr = nullptr;
1507  APInt Int8PtrOffset(Offset.getBitWidth(), 0);
1508 
1509  Type *TargetTy = PointerTy->getPointerElementType();
1510 
1511  do {
1512  // First fold any existing GEPs into the offset.
1513  while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
1514  APInt GEPOffset(Offset.getBitWidth(), 0);
1515  if (!GEP->accumulateConstantOffset(DL, GEPOffset))
1516  break;
1517  Offset += GEPOffset;
1518  Ptr = GEP->getPointerOperand();
1519  if (!Visited.insert(Ptr).second)
1520  break;
1521  }
1522 
1523  // See if we can perform a natural GEP here.
1524  Indices.clear();
1525  if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
1526  Indices, NamePrefix)) {
1527  // If we have a new natural pointer at the offset, clear out any old
1528  // offset pointer we computed. Unless it is the base pointer or
1529  // a non-instruction, we built a GEP we don't need. Zap it.
1530  if (OffsetPtr && OffsetPtr != OffsetBasePtr)
1531  if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
1532  assert(I->use_empty() && "Built a GEP with uses some how!");
1533  I->eraseFromParent();
1534  }
1535  OffsetPtr = P;
1536  OffsetBasePtr = Ptr;
1537  // If we also found a pointer of the right type, we're done.
1538  if (P->getType() == PointerTy)
1539  return P;
1540  }
1541 
1542  // Stash this pointer if we've found an i8*.
1543  if (Ptr->getType()->isIntegerTy(8)) {
1544  Int8Ptr = Ptr;
1545  Int8PtrOffset = Offset;
1546  }
1547 
1548  // Peel off a layer of the pointer and update the offset appropriately.
1549  if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
1550  Ptr = cast<Operator>(Ptr)->getOperand(0);
1551  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
1552  if (GA->isInterposable())
1553  break;
1554  Ptr = GA->getAliasee();
1555  } else {
1556  break;
1557  }
1558  assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
1559  } while (Visited.insert(Ptr).second);
1560 
1561  if (!OffsetPtr) {
1562  if (!Int8Ptr) {
1563  Int8Ptr = IRB.CreateBitCast(
1564  Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
1565  NamePrefix + "sroa_raw_cast");
1566  Int8PtrOffset = Offset;
1567  }
1568 
1569  OffsetPtr = Int8PtrOffset == 0
1570  ? Int8Ptr
1571  : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
1572  IRB.getInt(Int8PtrOffset),
1573  NamePrefix + "sroa_raw_idx");
1574  }
1575  Ptr = OffsetPtr;
1576 
1577  // On the off chance we were targeting i8*, guard the bitcast here.
1578  if (Ptr->getType() != PointerTy)
1579  Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
1580 
1581  return Ptr;
1582 }
1583 
1584 /// \brief Compute the adjusted alignment for a load or store from an offset.
1585 static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
1586  const DataLayout &DL) {
1587  unsigned Alignment;
1588  Type *Ty;
1589  if (auto *LI = dyn_cast<LoadInst>(I)) {
1590  Alignment = LI->getAlignment();
1591  Ty = LI->getType();
1592  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
1593  Alignment = SI->getAlignment();
1594  Ty = SI->getValueOperand()->getType();
1595  } else {
1596  llvm_unreachable("Only loads and stores are allowed!");
1597  }
1598 
1599  if (!Alignment)
1600  Alignment = DL.getABITypeAlignment(Ty);
1601 
1602  return MinAlign(Alignment, Offset);
1603 }
1604 
1605 /// \brief Test whether we can convert a value from the old to the new type.
1606 ///
1607 /// This predicate should be used to guard calls to convertValue in order to
1608 /// ensure that we only try to convert viable values. The strategy is that we
1609 /// will peel off single element struct and array wrappings to get to an
1610 /// underlying value, and convert that value.
1611 static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
1612  if (OldTy == NewTy)
1613  return true;
1614 
1615  // For integer types, we can't handle any bit-width differences. This would
1616  // break both vector conversions with extension and introduce endianness
1617  // issues when in conjunction with loads and stores.
1618  if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1619  assert(cast<IntegerType>(OldTy)->getBitWidth() !=
1620  cast<IntegerType>(NewTy)->getBitWidth() &&
1621  "We can't have the same bitwidth for different int types");
1622  return false;
1623  }
1624 
1625  if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
1626  return false;
1627  if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
1628  return false;
1629 
1630  // We can convert pointers to integers and vice-versa. Same for vectors
1631  // of pointers and integers.
1632  OldTy = OldTy->getScalarType();
1633  NewTy = NewTy->getScalarType();
1634  if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
1635  if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
1636  return cast<PointerType>(NewTy)->getPointerAddressSpace() ==
1637  cast<PointerType>(OldTy)->getPointerAddressSpace();
1638  }
1639  if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
1640  return true;
1641  return false;
1642  }
1643 
1644  return true;
1645 }
1646 
1647 /// \brief Generic routine to convert an SSA value to a value of a different
1648 /// type.
1649 ///
1650 /// This will try various different casting techniques, such as bitcasts,
1651 /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
1652 /// two types for viability with this routine.
1653 static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
1654  Type *NewTy) {
1655  Type *OldTy = V->getType();
1656  assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
1657 
1658  if (OldTy == NewTy)
1659  return V;
1660 
1661  assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1662  "Integer types must be the exact same to convert.");
1663 
1664  // See if we need inttoptr for this type pair. A cast involving both scalars
1665  // and vectors requires and additional bitcast.
1666  if (OldTy->getScalarType()->isIntegerTy() &&
1667  NewTy->getScalarType()->isPointerTy()) {
1668  // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
1669  if (OldTy->isVectorTy() && !NewTy->isVectorTy())
1670  return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
1671  NewTy);
1672 
1673  // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
1674  if (!OldTy->isVectorTy() && NewTy->isVectorTy())
1675  return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
1676  NewTy);
1677 
1678  return IRB.CreateIntToPtr(V, NewTy);
1679  }
1680 
1681  // See if we need ptrtoint for this type pair. A cast involving both scalars
1682  // and vectors requires and additional bitcast.
1683  if (OldTy->getScalarType()->isPointerTy() &&
1684  NewTy->getScalarType()->isIntegerTy()) {
1685  // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
1686  if (OldTy->isVectorTy() && !NewTy->isVectorTy())
1687  return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
1688  NewTy);
1689 
1690  // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
1691  if (!OldTy->isVectorTy() && NewTy->isVectorTy())
1692  return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
1693  NewTy);
1694 
1695  return IRB.CreatePtrToInt(V, NewTy);
1696  }
1697 
1698  return IRB.CreateBitCast(V, NewTy);
1699 }
1700 
1701 /// \brief Test whether the given slice use can be promoted to a vector.
1702 ///
1703 /// This function is called to test each entry in a partition which is slated
1704 /// for a single slice.
1705 static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
1706  VectorType *Ty,
1707  uint64_t ElementSize,
1708  const DataLayout &DL) {
1709  // First validate the slice offsets.
1710  uint64_t BeginOffset =
1711  std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
1712  uint64_t BeginIndex = BeginOffset / ElementSize;
1713  if (BeginIndex * ElementSize != BeginOffset ||
1714  BeginIndex >= Ty->getNumElements())
1715  return false;
1716  uint64_t EndOffset =
1717  std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
1718  uint64_t EndIndex = EndOffset / ElementSize;
1719  if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->getNumElements())
1720  return false;
1721 
1722  assert(EndIndex > BeginIndex && "Empty vector!");
1723  uint64_t NumElements = EndIndex - BeginIndex;
1724  Type *SliceTy = (NumElements == 1)
1725  ? Ty->getElementType()
1726  : VectorType::get(Ty->getElementType(), NumElements);
1727 
1728  Type *SplitIntTy =
1729  Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
1730 
1731  Use *U = S.getUse();
1732 
1733  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
1734  if (MI->isVolatile())
1735  return false;
1736  if (!S.isSplittable())
1737  return false; // Skip any unsplittable intrinsics.
1738  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1739  if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
1740  II->getIntrinsicID() != Intrinsic::lifetime_end)
1741  return false;
1742  } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
1743  // Disable vector promotion when there are loads or stores of an FCA.
1744  return false;
1745  } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1746  if (LI->isVolatile())
1747  return false;
1748  Type *LTy = LI->getType();
1749  if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1750  assert(LTy->isIntegerTy());
1751  LTy = SplitIntTy;
1752  }
1753  if (!canConvertValue(DL, SliceTy, LTy))
1754  return false;
1755  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1756  if (SI->isVolatile())
1757  return false;
1758  Type *STy = SI->getValueOperand()->getType();
1759  if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1760  assert(STy->isIntegerTy());
1761  STy = SplitIntTy;
1762  }
1763  if (!canConvertValue(DL, STy, SliceTy))
1764  return false;
1765  } else {
1766  return false;
1767  }
1768 
1769  return true;
1770 }
1771 
1772 /// \brief Test whether the given alloca partitioning and range of slices can be
1773 /// promoted to a vector.
1774 ///
1775 /// This is a quick test to check whether we can rewrite a particular alloca
1776 /// partition (and its newly formed alloca) into a vector alloca with only
1777 /// whole-vector loads and stores such that it could be promoted to a vector
1778 /// SSA value. We only can ensure this for a limited set of operations, and we
1779 /// don't want to do the rewrites unless we are confident that the result will
1780 /// be promotable, so we have an early test here.
1782  // Collect the candidate types for vector-based promotion. Also track whether
1783  // we have different element types.
1784  SmallVector<VectorType *, 4> CandidateTys;
1785  Type *CommonEltTy = nullptr;
1786  bool HaveCommonEltTy = true;
1787  auto CheckCandidateType = [&](Type *Ty) {
1788  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1789  CandidateTys.push_back(VTy);
1790  if (!CommonEltTy)
1791  CommonEltTy = VTy->getElementType();
1792  else if (CommonEltTy != VTy->getElementType())
1793  HaveCommonEltTy = false;
1794  }
1795  };
1796  // Consider any loads or stores that are the exact size of the slice.
1797  for (const Slice &S : P)
1798  if (S.beginOffset() == P.beginOffset() &&
1799  S.endOffset() == P.endOffset()) {
1800  if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
1801  CheckCandidateType(LI->getType());
1802  else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
1803  CheckCandidateType(SI->getValueOperand()->getType());
1804  }
1805 
1806  // If we didn't find a vector type, nothing to do here.
1807  if (CandidateTys.empty())
1808  return nullptr;
1809 
1810  // Remove non-integer vector types if we had multiple common element types.
1811  // FIXME: It'd be nice to replace them with integer vector types, but we can't
1812  // do that until all the backends are known to produce good code for all
1813  // integer vector types.
1814  if (!HaveCommonEltTy) {
1815  CandidateTys.erase(remove_if(CandidateTys,
1816  [](VectorType *VTy) {
1817  return !VTy->getElementType()->isIntegerTy();
1818  }),
1819  CandidateTys.end());
1820 
1821  // If there were no integer vector types, give up.
1822  if (CandidateTys.empty())
1823  return nullptr;
1824 
1825  // Rank the remaining candidate vector types. This is easy because we know
1826  // they're all integer vectors. We sort by ascending number of elements.
1827  auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
1828  assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) &&
1829  "Cannot have vector types of different sizes!");
1830  assert(RHSTy->getElementType()->isIntegerTy() &&
1831  "All non-integer types eliminated!");
1832  assert(LHSTy->getElementType()->isIntegerTy() &&
1833  "All non-integer types eliminated!");
1834  return RHSTy->getNumElements() < LHSTy->getNumElements();
1835  };
1836  std::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes);
1837  CandidateTys.erase(
1838  std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
1839  CandidateTys.end());
1840  } else {
1841 // The only way to have the same element type in every vector type is to
1842 // have the same vector type. Check that and remove all but one.
1843 #ifndef NDEBUG
1844  for (VectorType *VTy : CandidateTys) {
1845  assert(VTy->getElementType() == CommonEltTy &&
1846  "Unaccounted for element type!");
1847  assert(VTy == CandidateTys[0] &&
1848  "Different vector types with the same element type!");
1849  }
1850 #endif
1851  CandidateTys.resize(1);
1852  }
1853 
1854  // Try each vector type, and return the one which works.
1855  auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
1856  uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType());
1857 
1858  // While the definition of LLVM vectors is bitpacked, we don't support sizes
1859  // that aren't byte sized.
1860  if (ElementSize % 8)
1861  return false;
1862  assert((DL.getTypeSizeInBits(VTy) % 8) == 0 &&
1863  "vector size not a multiple of element size?");
1864  ElementSize /= 8;
1865 
1866  for (const Slice &S : P)
1867  if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
1868  return false;
1869 
1870  for (const Slice *S : P.splitSliceTails())
1871  if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
1872  return false;
1873 
1874  return true;
1875  };
1876  for (VectorType *VTy : CandidateTys)
1877  if (CheckVectorTypeForPromotion(VTy))
1878  return VTy;
1879 
1880  return nullptr;
1881 }
1882 
1883 /// \brief Test whether a slice of an alloca is valid for integer widening.
1884 ///
1885 /// This implements the necessary checking for the \c isIntegerWideningViable
1886 /// test below on a single slice of the alloca.
1887 static bool isIntegerWideningViableForSlice(const Slice &S,
1888  uint64_t AllocBeginOffset,
1889  Type *AllocaTy,
1890  const DataLayout &DL,
1891  bool &WholeAllocaOp) {
1892  uint64_t Size = DL.getTypeStoreSize(AllocaTy);
1893 
1894  uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
1895  uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
1896 
1897  // We can't reasonably handle cases where the load or store extends past
1898  // the end of the alloca's type and into its padding.
1899  if (RelEnd > Size)
1900  return false;
1901 
1902  Use *U = S.getUse();
1903 
1904  if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1905  if (LI->isVolatile())
1906  return false;
1907  // We can't handle loads that extend past the allocated memory.
1908  if (DL.getTypeStoreSize(LI->getType()) > Size)
1909  return false;
1910  // Note that we don't count vector loads or stores as whole-alloca
1911  // operations which enable integer widening because we would prefer to use
1912  // vector widening instead.
1913  if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
1914  WholeAllocaOp = true;
1915  if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
1916  if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
1917  return false;
1918  } else if (RelBegin != 0 || RelEnd != Size ||
1919  !canConvertValue(DL, AllocaTy, LI->getType())) {
1920  // Non-integer loads need to be convertible from the alloca type so that
1921  // they are promotable.
1922  return false;
1923  }
1924  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1925  Type *ValueTy = SI->getValueOperand()->getType();
1926  if (SI->isVolatile())
1927  return false;
1928  // We can't handle stores that extend past the allocated memory.
1929  if (DL.getTypeStoreSize(ValueTy) > Size)
1930  return false;
1931  // Note that we don't count vector loads or stores as whole-alloca
1932  // operations which enable integer widening because we would prefer to use
1933  // vector widening instead.
1934  if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
1935  WholeAllocaOp = true;
1936  if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
1937  if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
1938  return false;
1939  } else if (RelBegin != 0 || RelEnd != Size ||
1940  !canConvertValue(DL, ValueTy, AllocaTy)) {
1941  // Non-integer stores need to be convertible to the alloca type so that
1942  // they are promotable.
1943  return false;
1944  }
1945  } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
1946  if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
1947  return false;
1948  if (!S.isSplittable())
1949  return false; // Skip any unsplittable intrinsics.
1950  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1951  if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
1952  II->getIntrinsicID() != Intrinsic::lifetime_end)
1953  return false;
1954  } else {
1955  return false;
1956  }
1957 
1958  return true;
1959 }
1960 
1961 /// \brief Test whether the given alloca partition's integer operations can be
1962 /// widened to promotable ones.
1963 ///
1964 /// This is a quick test to check whether we can rewrite the integer loads and
1965 /// stores to a particular alloca into wider loads and stores and be able to
1966 /// promote the resulting alloca.
1967 static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
1968  const DataLayout &DL) {
1969  uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
1970  // Don't create integer types larger than the maximum bitwidth.
1971  if (SizeInBits > IntegerType::MAX_INT_BITS)
1972  return false;
1973 
1974  // Don't try to handle allocas with bit-padding.
1975  if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy))
1976  return false;
1977 
1978  // We need to ensure that an integer type with the appropriate bitwidth can
1979  // be converted to the alloca type, whatever that is. We don't want to force
1980  // the alloca itself to have an integer type if there is a more suitable one.
1981  Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
1982  if (!canConvertValue(DL, AllocaTy, IntTy) ||
1983  !canConvertValue(DL, IntTy, AllocaTy))
1984  return false;
1985 
1986  // While examining uses, we ensure that the alloca has a covering load or
1987  // store. We don't want to widen the integer operations only to fail to
1988  // promote due to some other unsplittable entry (which we may make splittable
1989  // later). However, if there are only splittable uses, go ahead and assume
1990  // that we cover the alloca.
1991  // FIXME: We shouldn't consider split slices that happen to start in the
1992  // partition here...
1993  bool WholeAllocaOp =
1994  P.begin() != P.end() ? false : DL.isLegalInteger(SizeInBits);
1995 
1996  for (const Slice &S : P)
1997  if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
1998  WholeAllocaOp))
1999  return false;
2000 
2001  for (const Slice *S : P.splitSliceTails())
2002  if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2003  WholeAllocaOp))
2004  return false;
2005 
2006  return WholeAllocaOp;
2007 }
2008 
2009 static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2010  IntegerType *Ty, uint64_t Offset,
2011  const Twine &Name) {
2012  DEBUG(dbgs() << " start: " << *V << "\n");
2013  IntegerType *IntTy = cast<IntegerType>(V->getType());
2014  assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
2015  "Element extends past full value");
2016  uint64_t ShAmt = 8 * Offset;
2017  if (DL.isBigEndian())
2018  ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
2019  if (ShAmt) {
2020  V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2021  DEBUG(dbgs() << " shifted: " << *V << "\n");
2022  }
2023  assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2024  "Cannot extract to a larger integer!");
2025  if (Ty != IntTy) {
2026  V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2027  DEBUG(dbgs() << " trunced: " << *V << "\n");
2028  }
2029  return V;
2030 }
2031 
2032 static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2033  Value *V, uint64_t Offset, const Twine &Name) {
2034  IntegerType *IntTy = cast<IntegerType>(Old->getType());
2035  IntegerType *Ty = cast<IntegerType>(V->getType());
2036  assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2037  "Cannot insert a larger integer!");
2038  DEBUG(dbgs() << " start: " << *V << "\n");
2039  if (Ty != IntTy) {
2040  V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2041  DEBUG(dbgs() << " extended: " << *V << "\n");
2042  }
2043  assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
2044  "Element store outside of alloca store");
2045  uint64_t ShAmt = 8 * Offset;
2046  if (DL.isBigEndian())
2047  ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
2048  if (ShAmt) {
2049  V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2050  DEBUG(dbgs() << " shifted: " << *V << "\n");
2051  }
2052 
2053  if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2054  APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2055  Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2056  DEBUG(dbgs() << " masked: " << *Old << "\n");
2057  V = IRB.CreateOr(Old, V, Name + ".insert");
2058  DEBUG(dbgs() << " inserted: " << *V << "\n");
2059  }
2060  return V;
2061 }
2062 
2063 static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2064  unsigned EndIndex, const Twine &Name) {
2065  VectorType *VecTy = cast<VectorType>(V->getType());
2066  unsigned NumElements = EndIndex - BeginIndex;
2067  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2068 
2069  if (NumElements == VecTy->getNumElements())
2070  return V;
2071 
2072  if (NumElements == 1) {
2073  V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2074  Name + ".extract");
2075  DEBUG(dbgs() << " extract: " << *V << "\n");
2076  return V;
2077  }
2078 
2080  Mask.reserve(NumElements);
2081  for (unsigned i = BeginIndex; i != EndIndex; ++i)
2082  Mask.push_back(IRB.getInt32(i));
2083  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
2084  ConstantVector::get(Mask), Name + ".extract");
2085  DEBUG(dbgs() << " shuffle: " << *V << "\n");
2086  return V;
2087 }
2088 
2089 static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2090  unsigned BeginIndex, const Twine &Name) {
2091  VectorType *VecTy = cast<VectorType>(Old->getType());
2092  assert(VecTy && "Can only insert a vector into a vector");
2093 
2094  VectorType *Ty = dyn_cast<VectorType>(V->getType());
2095  if (!Ty) {
2096  // Single element to insert.
2097  V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2098  Name + ".insert");
2099  DEBUG(dbgs() << " insert: " << *V << "\n");
2100  return V;
2101  }
2102 
2103  assert(Ty->getNumElements() <= VecTy->getNumElements() &&
2104  "Too many elements!");
2105  if (Ty->getNumElements() == VecTy->getNumElements()) {
2106  assert(V->getType() == VecTy && "Vector type mismatch");
2107  return V;
2108  }
2109  unsigned EndIndex = BeginIndex + Ty->getNumElements();
2110 
2111  // When inserting a smaller vector into the larger to store, we first
2112  // use a shuffle vector to widen it with undef elements, and then
2113  // a second shuffle vector to select between the loaded vector and the
2114  // incoming vector.
2116  Mask.reserve(VecTy->getNumElements());
2117  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
2118  if (i >= BeginIndex && i < EndIndex)
2119  Mask.push_back(IRB.getInt32(i - BeginIndex));
2120  else
2121  Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
2122  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
2123  ConstantVector::get(Mask), Name + ".expand");
2124  DEBUG(dbgs() << " shuffle: " << *V << "\n");
2125 
2126  Mask.clear();
2127  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
2128  Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2129 
2130  V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
2131 
2132  DEBUG(dbgs() << " blend: " << *V << "\n");
2133  return V;
2134 }
2135 
2136 /// \brief Visitor to rewrite instructions using p particular slice of an alloca
2137 /// to use a new alloca.
2138 ///
2139 /// Also implements the rewriting to vector-based accesses when the partition
2140 /// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2141 /// lives here.
2143  : public InstVisitor<AllocaSliceRewriter, bool> {
2144  // Befriend the base class so it can delegate to private visit methods.
2147 
2148  const DataLayout &DL;
2149  AllocaSlices &AS;
2150  SROA &Pass;
2151  AllocaInst &OldAI, &NewAI;
2152  const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2153  Type *NewAllocaTy;
2154 
2155  // This is a convenience and flag variable that will be null unless the new
2156  // alloca's integer operations should be widened to this integer type due to
2157  // passing isIntegerWideningViable above. If it is non-null, the desired
2158  // integer type will be stored here for easy access during rewriting.
2159  IntegerType *IntTy;
2160 
2161  // If we are rewriting an alloca partition which can be written as pure
2162  // vector operations, we stash extra information here. When VecTy is
2163  // non-null, we have some strict guarantees about the rewritten alloca:
2164  // - The new alloca is exactly the size of the vector type here.
2165  // - The accesses all either map to the entire vector or to a single
2166  // element.
2167  // - The set of accessing instructions is only one of those handled above
2168  // in isVectorPromotionViable. Generally these are the same access kinds
2169  // which are promotable via mem2reg.
2170  VectorType *VecTy;
2171  Type *ElementTy;
2172  uint64_t ElementSize;
2173 
2174  // The original offset of the slice currently being rewritten relative to
2175  // the original alloca.
2176  uint64_t BeginOffset, EndOffset;
2177  // The new offsets of the slice currently being rewritten relative to the
2178  // original alloca.
2179  uint64_t NewBeginOffset, NewEndOffset;
2180 
2181  uint64_t SliceSize;
2182  bool IsSplittable;
2183  bool IsSplit;
2184  Use *OldUse;
2185  Instruction *OldPtr;
2186 
2187  // Track post-rewrite users which are PHI nodes and Selects.
2188  SmallPtrSetImpl<PHINode *> &PHIUsers;
2189  SmallPtrSetImpl<SelectInst *> &SelectUsers;
2190 
2191  // Utility IR builder, whose name prefix is setup for each visited use, and
2192  // the insertion point is set to point to the user.
2193  IRBuilderTy IRB;
2194 
2195 public:
2197  AllocaInst &OldAI, AllocaInst &NewAI,
2198  uint64_t NewAllocaBeginOffset,
2199  uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2200  VectorType *PromotableVecTy,
2201  SmallPtrSetImpl<PHINode *> &PHIUsers,
2202  SmallPtrSetImpl<SelectInst *> &SelectUsers)
2203  : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2204  NewAllocaBeginOffset(NewAllocaBeginOffset),
2205  NewAllocaEndOffset(NewAllocaEndOffset),
2206  NewAllocaTy(NewAI.getAllocatedType()),
2207  IntTy(IsIntegerPromotable
2208  ? Type::getIntNTy(
2209  NewAI.getContext(),
2210  DL.getTypeSizeInBits(NewAI.getAllocatedType()))
2211  : nullptr),
2212  VecTy(PromotableVecTy),
2213  ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2214  ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
2215  BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
2216  OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2217  IRB(NewAI.getContext(), ConstantFolder()) {
2218  if (VecTy) {
2219  assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
2220  "Only multiple-of-8 sized vector elements are viable");
2221  ++NumVectorized;
2222  }
2223  assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2224  }
2225 
2227  bool CanSROA = true;
2228  BeginOffset = I->beginOffset();
2229  EndOffset = I->endOffset();
2230  IsSplittable = I->isSplittable();
2231  IsSplit =
2232  BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2233  DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2234  DEBUG(AS.printSlice(dbgs(), I, ""));
2235  DEBUG(dbgs() << "\n");
2236 
2237  // Compute the intersecting offset range.
2238  assert(BeginOffset < NewAllocaEndOffset);
2239  assert(EndOffset > NewAllocaBeginOffset);
2240  NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2241  NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2242 
2243  SliceSize = NewEndOffset - NewBeginOffset;
2244 
2245  OldUse = I->getUse();
2246  OldPtr = cast<Instruction>(OldUse->get());
2247 
2248  Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2249  IRB.SetInsertPoint(OldUserI);
2250  IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2251  IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");
2252 
2253  CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2254  if (VecTy || IntTy)
2255  assert(CanSROA);
2256  return CanSROA;
2257  }
2258 
2259 private:
2260  // Make sure the other visit overloads are visible.
2261  using Base::visit;
2262 
2263  // Every instruction which can end up as a user must have a rewrite rule.
2264  bool visitInstruction(Instruction &I) {
2265  DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
2266  llvm_unreachable("No rewrite rule for this instruction!");
2267  }
2268 
2269  Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
2270  // Note that the offset computation can use BeginOffset or NewBeginOffset
2271  // interchangeably for unsplit slices.
2272  assert(IsSplit || BeginOffset == NewBeginOffset);
2273  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2274 
2275 #ifndef NDEBUG
2276  StringRef OldName = OldPtr->getName();
2277  // Skip through the last '.sroa.' component of the name.
2278  size_t LastSROAPrefix = OldName.rfind(".sroa.");
2279  if (LastSROAPrefix != StringRef::npos) {
2280  OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
2281  // Look for an SROA slice index.
2282  size_t IndexEnd = OldName.find_first_not_of("0123456789");
2283  if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
2284  // Strip the index and look for the offset.
2285  OldName = OldName.substr(IndexEnd + 1);
2286  size_t OffsetEnd = OldName.find_first_not_of("0123456789");
2287  if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
2288  // Strip the offset.
2289  OldName = OldName.substr(OffsetEnd + 1);
2290  }
2291  }
2292  // Strip any SROA suffixes as well.
2293  OldName = OldName.substr(0, OldName.find(".sroa_"));
2294 #endif
2295 
2296  return getAdjustedPtr(IRB, DL, &NewAI,
2297  APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
2298 #ifndef NDEBUG
2299  Twine(OldName) + "."
2300 #else
2301  Twine()
2302 #endif
2303  );
2304  }
2305 
2306  /// \brief Compute suitable alignment to access this slice of the *new*
2307  /// alloca.
2308  ///
2309  /// You can optionally pass a type to this routine and if that type's ABI
2310  /// alignment is itself suitable, this will return zero.
2311  unsigned getSliceAlign(Type *Ty = nullptr) {
2312  unsigned NewAIAlign = NewAI.getAlignment();
2313  if (!NewAIAlign)
2314  NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
2315  unsigned Align =
2316  MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
2317  return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
2318  }
2319 
2320  unsigned getIndex(uint64_t Offset) {
2321  assert(VecTy && "Can only call getIndex when rewriting a vector");
2322  uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2323  assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
2324  uint32_t Index = RelOffset / ElementSize;
2325  assert(Index * ElementSize == RelOffset);
2326  return Index;
2327  }
2328 
2329  void deleteIfTriviallyDead(Value *V) {
2330  Instruction *I = cast<Instruction>(V);
2332  Pass.DeadInsts.insert(I);
2333  }
2334 
2335  Value *rewriteVectorizedLoadInst() {
2336  unsigned BeginIndex = getIndex(NewBeginOffset);
2337  unsigned EndIndex = getIndex(NewEndOffset);
2338  assert(EndIndex > BeginIndex && "Empty vector!");
2339 
2340  Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
2341  return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
2342  }
2343 
2344  Value *rewriteIntegerLoad(LoadInst &LI) {
2345  assert(IntTy && "We cannot insert an integer to the alloca");
2346  assert(!LI.isVolatile());
2347  Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
2348  V = convertValue(DL, IRB, V, IntTy);
2349  assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2350  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2351  if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2352  IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
2353  V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
2354  }
2355  // It is possible that the extracted type is not the load type. This
2356  // happens if there is a load past the end of the alloca, and as
2357  // a consequence the slice is narrower but still a candidate for integer
2358  // lowering. To handle this case, we just zero extend the extracted
2359  // integer.
2360  assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
2361  "Can only handle an extract for an overly wide load");
2362  if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
2363  V = IRB.CreateZExt(V, LI.getType());
2364  return V;
2365  }
2366 
2367  bool visitLoadInst(LoadInst &LI) {
2368  DEBUG(dbgs() << " original: " << LI << "\n");
2369  Value *OldOp = LI.getOperand(0);
2370  assert(OldOp == OldPtr);
2371 
2372  Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
2373  : LI.getType();
2374  const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
2375  bool IsPtrAdjusted = false;
2376  Value *V;
2377  if (VecTy) {
2378  V = rewriteVectorizedLoadInst();
2379  } else if (IntTy && LI.getType()->isIntegerTy()) {
2380  V = rewriteIntegerLoad(LI);
2381  } else if (NewBeginOffset == NewAllocaBeginOffset &&
2382  NewEndOffset == NewAllocaEndOffset &&
2383  (canConvertValue(DL, NewAllocaTy, TargetTy) ||
2384  (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
2385  TargetTy->isIntegerTy()))) {
2386  LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
2387  LI.isVolatile(), LI.getName());
2388  if (LI.isVolatile())
2389  NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
2390  V = NewLI;
2391 
2392  // If this is an integer load past the end of the slice (which means the
2393  // bytes outside the slice are undef or this load is dead) just forcibly
2394  // fix the integer size with correct handling of endianness.
2395  if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2396  if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
2397  if (AITy->getBitWidth() < TITy->getBitWidth()) {
2398  V = IRB.CreateZExt(V, TITy, "load.ext");
2399  if (DL.isBigEndian())
2400  V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2401  "endian_shift");
2402  }
2403  } else {
2404  Type *LTy = TargetTy->getPointerTo();
2405  LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
2406  getSliceAlign(TargetTy),
2407  LI.isVolatile(), LI.getName());
2408  if (LI.isVolatile())
2409  NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
2410 
2411  V = NewLI;
2412  IsPtrAdjusted = true;
2413  }
2414  V = convertValue(DL, IRB, V, TargetTy);
2415 
2416  if (IsSplit) {
2417  assert(!LI.isVolatile());
2418  assert(LI.getType()->isIntegerTy() &&
2419  "Only integer type loads and stores are split");
2420  assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
2421  "Split load isn't smaller than original load");
2422  assert(LI.getType()->getIntegerBitWidth() ==
2423  DL.getTypeStoreSizeInBits(LI.getType()) &&
2424  "Non-byte-multiple bit width");
2425  // Move the insertion point just past the load so that we can refer to it.
2426  IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
2427  // Create a placeholder value with the same type as LI to use as the
2428  // basis for the new value. This allows us to replace the uses of LI with
2429  // the computed value, and then replace the placeholder with LI, leaving
2430  // LI only used for this computation.
2431  Value *Placeholder =
2433  V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2434  "insert");
2435  LI.replaceAllUsesWith(V);
2436  Placeholder->replaceAllUsesWith(&LI);
2437  delete Placeholder;
2438  } else {
2439  LI.replaceAllUsesWith(V);
2440  }
2441 
2442  Pass.DeadInsts.insert(&LI);
2443  deleteIfTriviallyDead(OldOp);
2444  DEBUG(dbgs() << " to: " << *V << "\n");
2445  return !LI.isVolatile() && !IsPtrAdjusted;
2446  }
2447 
2448  bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) {
2449  if (V->getType() != VecTy) {
2450  unsigned BeginIndex = getIndex(NewBeginOffset);
2451  unsigned EndIndex = getIndex(NewEndOffset);
2452  assert(EndIndex > BeginIndex && "Empty vector!");
2453  unsigned NumElements = EndIndex - BeginIndex;
2454  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2455  Type *SliceTy = (NumElements == 1)
2456  ? ElementTy
2457  : VectorType::get(ElementTy, NumElements);
2458  if (V->getType() != SliceTy)
2459  V = convertValue(DL, IRB, V, SliceTy);
2460 
2461  // Mix in the existing elements.
2462  Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
2463  V = insertVector(IRB, Old, V, BeginIndex, "vec");
2464  }
2465  StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2466  Pass.DeadInsts.insert(&SI);
2467 
2468  (void)Store;
2469  DEBUG(dbgs() << " to: " << *Store << "\n");
2470  return true;
2471  }
2472 
2473  bool rewriteIntegerStore(Value *V, StoreInst &SI) {
2474  assert(IntTy && "We cannot extract an integer from the alloca");
2475  assert(!SI.isVolatile());
2476  if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
2477  Value *Old =
2478  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
2479  Old = convertValue(DL, IRB, Old, IntTy);
2480  assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2481  uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
2482  V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
2483  }
2484  V = convertValue(DL, IRB, V, NewAllocaTy);
2485  StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2487  Pass.DeadInsts.insert(&SI);
2488  DEBUG(dbgs() << " to: " << *Store << "\n");
2489  return true;
2490  }
2491 
2492  bool visitStoreInst(StoreInst &SI) {
2493  DEBUG(dbgs() << " original: " << SI << "\n");
2494  Value *OldOp = SI.getOperand(1);
2495  assert(OldOp == OldPtr);
2496 
2497  Value *V = SI.getValueOperand();
2498 
2499  // Strip all inbounds GEPs and pointer casts to try to dig out any root
2500  // alloca that should be re-examined after promoting this alloca.
2501  if (V->getType()->isPointerTy())
2502  if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
2503  Pass.PostPromotionWorklist.insert(AI);
2504 
2505  if (SliceSize < DL.getTypeStoreSize(V->getType())) {
2506  assert(!SI.isVolatile());
2507  assert(V->getType()->isIntegerTy() &&
2508  "Only integer type loads and stores are split");
2509  assert(V->getType()->getIntegerBitWidth() ==
2510  DL.getTypeStoreSizeInBits(V->getType()) &&
2511  "Non-byte-multiple bit width");
2512  IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
2513  V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
2514  "extract");
2515  }
2516 
2517  if (VecTy)
2518  return rewriteVectorizedStoreInst(V, SI, OldOp);
2519  if (IntTy && V->getType()->isIntegerTy())
2520  return rewriteIntegerStore(V, SI);
2521 
2522  const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
2523  StoreInst *NewSI;
2524  if (NewBeginOffset == NewAllocaBeginOffset &&
2525  NewEndOffset == NewAllocaEndOffset &&
2526  (canConvertValue(DL, V->getType(), NewAllocaTy) ||
2527  (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
2528  V->getType()->isIntegerTy()))) {
2529  // If this is an integer store past the end of slice (and thus the bytes
2530  // past that point are irrelevant or this is unreachable), truncate the
2531  // value prior to storing.
2532  if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
2533  if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2534  if (VITy->getBitWidth() > AITy->getBitWidth()) {
2535  if (DL.isBigEndian())
2536  V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2537  "endian_shift");
2538  V = IRB.CreateTrunc(V, AITy, "load.trunc");
2539  }
2540 
2541  V = convertValue(DL, IRB, V, NewAllocaTy);
2542  NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2543  SI.isVolatile());
2544  } else {
2545  Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
2546  NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
2547  SI.isVolatile());
2548  }
2550  if (SI.isVolatile())
2551  NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
2552  Pass.DeadInsts.insert(&SI);
2553  deleteIfTriviallyDead(OldOp);
2554 
2555  DEBUG(dbgs() << " to: " << *NewSI << "\n");
2556  return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
2557  }
2558 
2559  /// \brief Compute an integer value from splatting an i8 across the given
2560  /// number of bytes.
2561  ///
2562  /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
2563  /// call this routine.
2564  /// FIXME: Heed the advice above.
2565  ///
2566  /// \param V The i8 value to splat.
2567  /// \param Size The number of bytes in the output (assuming i8 is one byte)
2568  Value *getIntegerSplat(Value *V, unsigned Size) {
2569  assert(Size > 0 && "Expected a positive number of bytes.");
2570  IntegerType *VTy = cast<IntegerType>(V->getType());
2571  assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
2572  if (Size == 1)
2573  return V;
2574 
2575  Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
2576  V = IRB.CreateMul(
2577  IRB.CreateZExt(V, SplatIntTy, "zext"),
2579  Constant::getAllOnesValue(SplatIntTy),
2581  SplatIntTy)),
2582  "isplat");
2583  return V;
2584  }
2585 
2586  /// \brief Compute a vector splat for a given element value.
2587  Value *getVectorSplat(Value *V, unsigned NumElements) {
2588  V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
2589  DEBUG(dbgs() << " splat: " << *V << "\n");
2590  return V;
2591  }
2592 
2593  bool visitMemSetInst(MemSetInst &II) {
2594  DEBUG(dbgs() << " original: " << II << "\n");
2595  assert(II.getRawDest() == OldPtr);
2596 
2597  // If the memset has a variable size, it cannot be split, just adjust the
2598  // pointer to the new alloca.
2599  if (!isa<Constant>(II.getLength())) {
2600  assert(!IsSplit);
2601  assert(NewBeginOffset == BeginOffset);
2602  II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
2603  Type *CstTy = II.getAlignmentCst()->getType();
2604  II.setAlignment(ConstantInt::get(CstTy, getSliceAlign()));
2605 
2606  deleteIfTriviallyDead(OldPtr);
2607  return false;
2608  }
2609 
2610  // Record this instruction for deletion.
2611  Pass.DeadInsts.insert(&II);
2612 
2613  Type *AllocaTy = NewAI.getAllocatedType();
2614  Type *ScalarTy = AllocaTy->getScalarType();
2615 
2616  // If this doesn't map cleanly onto the alloca type, and that type isn't
2617  // a single value type, just emit a memset.
2618  if (!VecTy && !IntTy &&
2619  (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2620  SliceSize != DL.getTypeStoreSize(AllocaTy) ||
2621  !AllocaTy->isSingleValueType() ||
2622  !DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) ||
2623  DL.getTypeSizeInBits(ScalarTy) % 8 != 0)) {
2624  Type *SizeTy = II.getLength()->getType();
2625  Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
2626  CallInst *New = IRB.CreateMemSet(
2627  getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
2628  getSliceAlign(), II.isVolatile());
2629  (void)New;
2630  DEBUG(dbgs() << " to: " << *New << "\n");
2631  return false;
2632  }
2633 
2634  // If we can represent this as a simple value, we have to build the actual
2635  // value to store, which requires expanding the byte present in memset to
2636  // a sensible representation for the alloca type. This is essentially
2637  // splatting the byte to a sufficiently wide integer, splatting it across
2638  // any desired vector width, and bitcasting to the final type.
2639  Value *V;
2640 
2641  if (VecTy) {
2642  // If this is a memset of a vectorized alloca, insert it.
2643  assert(ElementTy == ScalarTy);
2644 
2645  unsigned BeginIndex = getIndex(NewBeginOffset);
2646  unsigned EndIndex = getIndex(NewEndOffset);
2647  assert(EndIndex > BeginIndex && "Empty vector!");
2648  unsigned NumElements = EndIndex - BeginIndex;
2649  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2650 
2651  Value *Splat =
2652  getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ElementTy) / 8);
2653  Splat = convertValue(DL, IRB, Splat, ElementTy);
2654  if (NumElements > 1)
2655  Splat = getVectorSplat(Splat, NumElements);
2656 
2657  Value *Old =
2658  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
2659  V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
2660  } else if (IntTy) {
2661  // If this is a memset on an alloca where we can widen stores, insert the
2662  // set integer.
2663  assert(!II.isVolatile());
2664 
2665  uint64_t Size = NewEndOffset - NewBeginOffset;
2666  V = getIntegerSplat(II.getValue(), Size);
2667 
2668  if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
2669  EndOffset != NewAllocaBeginOffset)) {
2670  Value *Old =
2671  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
2672  Old = convertValue(DL, IRB, Old, IntTy);
2673  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2674  V = insertInteger(DL, IRB, Old, V, Offset, "insert");
2675  } else {
2676  assert(V->getType() == IntTy &&
2677  "Wrong type for an alloca wide integer!");
2678  }
2679  V = convertValue(DL, IRB, V, AllocaTy);
2680  } else {
2681  // Established these invariants above.
2682  assert(NewBeginOffset == NewAllocaBeginOffset);
2683  assert(NewEndOffset == NewAllocaEndOffset);
2684 
2685  V = getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ScalarTy) / 8);
2686  if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
2687  V = getVectorSplat(V, AllocaVecTy->getNumElements());
2688 
2689  V = convertValue(DL, IRB, V, AllocaTy);
2690  }
2691 
2692  Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2693  II.isVolatile());
2694  (void)New;
2695  DEBUG(dbgs() << " to: " << *New << "\n");
2696  return !II.isVolatile();
2697  }
2698 
2700  // Rewriting of memory transfer instructions can be a bit tricky. We break
2701  // them into two categories: split intrinsics and unsplit intrinsics.
2702 
2703  DEBUG(dbgs() << " original: " << II << "\n");
2704 
2705  bool IsDest = &II.getRawDestUse() == OldUse;
2706  assert((IsDest && II.getRawDest() == OldPtr) ||
2707  (!IsDest && II.getRawSource() == OldPtr));
2708 
2709  unsigned SliceAlign = getSliceAlign();
2710 
2711  // For unsplit intrinsics, we simply modify the source and destination
2712  // pointers in place. This isn't just an optimization, it is a matter of
2713  // correctness. With unsplit intrinsics we may be dealing with transfers
2714  // within a single alloca before SROA ran, or with transfers that have
2715  // a variable length. We may also be dealing with memmove instead of
2716  // memcpy, and so simply updating the pointers is the necessary for us to
2717  // update both source and dest of a single call.
2718  if (!IsSplittable) {
2719  Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2720  if (IsDest)
2721  II.setDest(AdjustedPtr);
2722  else
2723  II.setSource(AdjustedPtr);
2724 
2725  if (II.getAlignment() > SliceAlign) {
2726  Type *CstTy = II.getAlignmentCst()->getType();
2727  II.setAlignment(
2728  ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign)));
2729  }
2730 
2731  DEBUG(dbgs() << " to: " << II << "\n");
2732  deleteIfTriviallyDead(OldPtr);
2733  return false;
2734  }
2735  // For split transfer intrinsics we have an incredibly useful assurance:
2736  // the source and destination do not reside within the same alloca, and at
2737  // least one of them does not escape. This means that we can replace
2738  // memmove with memcpy, and we don't need to worry about all manner of
2739  // downsides to splitting and transforming the operations.
2740 
2741  // If this doesn't map cleanly onto the alloca type, and that type isn't
2742  // a single value type, just emit a memcpy.
2743  bool EmitMemCpy =
2744  !VecTy && !IntTy &&
2745  (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2746  SliceSize != DL.getTypeStoreSize(NewAI.getAllocatedType()) ||
2747  !NewAI.getAllocatedType()->isSingleValueType());
2748 
2749  // If we're just going to emit a memcpy, the alloca hasn't changed, and the
2750  // size hasn't been shrunk based on analysis of the viable range, this is
2751  // a no-op.
2752  if (EmitMemCpy && &OldAI == &NewAI) {
2753  // Ensure the start lines up.
2754  assert(NewBeginOffset == BeginOffset);
2755 
2756  // Rewrite the size as needed.
2757  if (NewEndOffset != EndOffset)
2759  NewEndOffset - NewBeginOffset));
2760  return false;
2761  }
2762  // Record this instruction for deletion.
2763  Pass.DeadInsts.insert(&II);
2764 
2765  // Strip all inbounds GEPs and pointer casts to try to dig out any root
2766  // alloca that should be re-examined after rewriting this instruction.
2767  Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
2768  if (AllocaInst *AI =
2769  dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
2770  assert(AI != &OldAI && AI != &NewAI &&
2771  "Splittable transfers cannot reach the same alloca on both ends.");
2772  Pass.Worklist.insert(AI);
2773  }
2774 
2775  Type *OtherPtrTy = OtherPtr->getType();
2776  unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
2777 
2778  // Compute the relative offset for the other pointer within the transfer.
2779  unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
2780  APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
2781  unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1,
2782  OtherOffset.zextOrTrunc(64).getZExtValue());
2783 
2784  if (EmitMemCpy) {
2785  // Compute the other pointer, folding as much as possible to produce
2786  // a single, simple GEP in most cases.
2787  OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
2788  OtherPtr->getName() + ".");
2789 
2790  Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2791  Type *SizeTy = II.getLength()->getType();
2792  Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
2793 
2794  CallInst *New = IRB.CreateMemCpy(
2795  IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size,
2796  MinAlign(SliceAlign, OtherAlign), II.isVolatile());
2797  (void)New;
2798  DEBUG(dbgs() << " to: " << *New << "\n");
2799  return false;
2800  }
2801 
2802  bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
2803  NewEndOffset == NewAllocaEndOffset;
2804  uint64_t Size = NewEndOffset - NewBeginOffset;
2805  unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
2806  unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
2807  unsigned NumElements = EndIndex - BeginIndex;
2808  IntegerType *SubIntTy =
2809  IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
2810 
2811  // Reset the other pointer type to match the register type we're going to
2812  // use, but using the address space of the original other pointer.
2813  if (VecTy && !IsWholeAlloca) {
2814  if (NumElements == 1)
2815  OtherPtrTy = VecTy->getElementType();
2816  else
2817  OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
2818 
2819  OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
2820  } else if (IntTy && !IsWholeAlloca) {
2821  OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
2822  } else {
2823  OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
2824  }
2825 
2826  Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
2827  OtherPtr->getName() + ".");
2828  unsigned SrcAlign = OtherAlign;
2829  Value *DstPtr = &NewAI;
2830  unsigned DstAlign = SliceAlign;
2831  if (!IsDest) {
2832  std::swap(SrcPtr, DstPtr);
2833  std::swap(SrcAlign, DstAlign);
2834  }
2835 
2836  Value *Src;
2837  if (VecTy && !IsWholeAlloca && !IsDest) {
2838  Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
2839  Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
2840  } else if (IntTy && !IsWholeAlloca && !IsDest) {
2841  Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
2842  Src = convertValue(DL, IRB, Src, IntTy);
2843  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2844  Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
2845  } else {
2846  Src =
2847  IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(), "copyload");
2848  }
2849 
2850  if (VecTy && !IsWholeAlloca && IsDest) {
2851  Value *Old =
2852  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
2853  Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
2854  } else if (IntTy && !IsWholeAlloca && IsDest) {
2855  Value *Old =
2856  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
2857  Old = convertValue(DL, IRB, Old, IntTy);
2858  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2859  Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
2860  Src = convertValue(DL, IRB, Src, NewAllocaTy);
2861  }
2862 
2863  StoreInst *Store = cast<StoreInst>(
2864  IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
2865  (void)Store;
2866  DEBUG(dbgs() << " to: " << *Store << "\n");
2867  return !II.isVolatile();
2868  }
2869 
2870  bool visitIntrinsicInst(IntrinsicInst &II) {
2871  assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
2872  II.getIntrinsicID() == Intrinsic::lifetime_end);
2873  DEBUG(dbgs() << " original: " << II << "\n");
2874  assert(II.getArgOperand(1) == OldPtr);
2875 
2876  // Record this instruction for deletion.
2877  Pass.DeadInsts.insert(&II);
2878 
2879  // Lifetime intrinsics are only promotable if they cover the whole alloca.
2880  // Therefore, we drop lifetime intrinsics which don't cover the whole
2881  // alloca.
2882  // (In theory, intrinsics which partially cover an alloca could be
2883  // promoted, but PromoteMemToReg doesn't handle that case.)
2884  // FIXME: Check whether the alloca is promotable before dropping the
2885  // lifetime intrinsics?
2886  if (NewBeginOffset != NewAllocaBeginOffset ||
2887  NewEndOffset != NewAllocaEndOffset)
2888  return true;
2889 
2890  ConstantInt *Size =
2891  ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
2892  NewEndOffset - NewBeginOffset);
2893  Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2894  Value *New;
2895  if (II.getIntrinsicID() == Intrinsic::lifetime_start)
2896  New = IRB.CreateLifetimeStart(Ptr, Size);
2897  else
2898  New = IRB.CreateLifetimeEnd(Ptr, Size);
2899 
2900  (void)New;
2901  DEBUG(dbgs() << " to: " << *New << "\n");
2902 
2903  return true;
2904  }
2905 
2906  bool visitPHINode(PHINode &PN) {
2907  DEBUG(dbgs() << " original: " << PN << "\n");
2908  assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
2909  assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
2910 
2911  // We would like to compute a new pointer in only one place, but have it be
2912  // as local as possible to the PHI. To do that, we re-use the location of
2913  // the old pointer, which necessarily must be in the right position to
2914  // dominate the PHI.
2915  IRBuilderTy PtrBuilder(IRB);
2916  if (isa<PHINode>(OldPtr))
2917  PtrBuilder.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
2918  else
2919  PtrBuilder.SetInsertPoint(OldPtr);
2920  PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
2921 
2922  Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
2923  // Replace the operands which were using the old pointer.
2924  std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
2925 
2926  DEBUG(dbgs() << " to: " << PN << "\n");
2927  deleteIfTriviallyDead(OldPtr);
2928 
2929  // PHIs can't be promoted on their own, but often can be speculated. We
2930  // check the speculation outside of the rewriter so that we see the
2931  // fully-rewritten alloca.
2932  PHIUsers.insert(&PN);
2933  return true;
2934  }
2935 
2936  bool visitSelectInst(SelectInst &SI) {
2937  DEBUG(dbgs() << " original: " << SI << "\n");
2938  assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
2939  "Pointer isn't an operand!");
2940  assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
2941  assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
2942 
2943  Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2944  // Replace the operands which were using the old pointer.
2945  if (SI.getOperand(1) == OldPtr)
2946  SI.setOperand(1, NewPtr);
2947  if (SI.getOperand(2) == OldPtr)
2948  SI.setOperand(2, NewPtr);
2949 
2950  DEBUG(dbgs() << " to: " << SI << "\n");
2951  deleteIfTriviallyDead(OldPtr);
2952 
2953  // Selects can't be promoted on their own, but often can be speculated. We
2954  // check the speculation outside of the rewriter so that we see the
2955  // fully-rewritten alloca.
2956  SelectUsers.insert(&SI);
2957  return true;
2958  }
2959 };
2960 
2961 namespace {
2962 /// \brief Visitor to rewrite aggregate loads and stores as scalar.
2963 ///
2964 /// This pass aggressively rewrites all aggregate loads and stores on
2965 /// a particular pointer (or any pointer derived from it which we can identify)
2966 /// with scalar loads and stores.
2967 class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
2968  // Befriend the base class so it can delegate to private visit methods.
2969  friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
2970 
2971  /// Queue of pointer uses to analyze and potentially rewrite.
2973 
2974  /// Set to prevent us from cycling with phi nodes and loops.
2975  SmallPtrSet<User *, 8> Visited;
2976 
2977  /// The current pointer use being rewritten. This is used to dig up the used
2978  /// value (as opposed to the user).
2979  Use *U;
2980 
2981 public:
2982  /// Rewrite loads and stores through a pointer and all pointers derived from
2983  /// it.
2984  bool rewrite(Instruction &I) {
2985  DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
2986  enqueueUsers(I);
2987  bool Changed = false;
2988  while (!Queue.empty()) {
2989  U = Queue.pop_back_val();
2990  Changed |= visit(cast<Instruction>(U->getUser()));
2991  }
2992  return Changed;
2993  }
2994 
2995 private:
2996  /// Enqueue all the users of the given instruction for further processing.
2997  /// This uses a set to de-duplicate users.
2998  void enqueueUsers(Instruction &I) {
2999  for (Use &U : I.uses())
3000  if (Visited.insert(U.getUser()).second)
3001  Queue.push_back(&U);
3002  }
3003 
3004  // Conservative default is to not rewrite anything.
3005  bool visitInstruction(Instruction &I) { return false; }
3006 
3007  /// \brief Generic recursive split emission class.
3008  template <typename Derived> class OpSplitter {
3009  protected:
3010  /// The builder used to form new instructions.
3011  IRBuilderTy IRB;
3012  /// The indices which to be used with insert- or extractvalue to select the
3013  /// appropriate value within the aggregate.
3014  SmallVector<unsigned, 4> Indices;
3015  /// The indices to a GEP instruction which will move Ptr to the correct slot
3016  /// within the aggregate.
3017  SmallVector<Value *, 4> GEPIndices;
3018  /// The base pointer of the original op, used as a base for GEPing the
3019  /// split operations.
3020  Value *Ptr;
3021 
3022  /// Initialize the splitter with an insertion point, Ptr and start with a
3023  /// single zero GEP index.
3024  OpSplitter(Instruction *InsertionPoint, Value *Ptr)
3025  : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
3026 
3027  public:
3028  /// \brief Generic recursive split emission routine.
3029  ///
3030  /// This method recursively splits an aggregate op (load or store) into
3031  /// scalar or vector ops. It splits recursively until it hits a single value
3032  /// and emits that single value operation via the template argument.
3033  ///
3034  /// The logic of this routine relies on GEPs and insertvalue and
3035  /// extractvalue all operating with the same fundamental index list, merely
3036  /// formatted differently (GEPs need actual values).
3037  ///
3038  /// \param Ty The type being split recursively into smaller ops.
3039  /// \param Agg The aggregate value being built up or stored, depending on
3040  /// whether this is splitting a load or a store respectively.
3041  void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
3042  if (Ty->isSingleValueType())
3043  return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name);
3044 
3045  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3046  unsigned OldSize = Indices.size();
3047  (void)OldSize;
3048  for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
3049  ++Idx) {
3050  assert(Indices.size() == OldSize && "Did not return to the old size");
3051  Indices.push_back(Idx);
3052  GEPIndices.push_back(IRB.getInt32(Idx));
3053  emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
3054  GEPIndices.pop_back();
3055  Indices.pop_back();
3056  }
3057  return;
3058  }
3059 
3060  if (StructType *STy = dyn_cast<StructType>(Ty)) {
3061  unsigned OldSize = Indices.size();
3062  (void)OldSize;
3063  for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
3064  ++Idx) {
3065  assert(Indices.size() == OldSize && "Did not return to the old size");
3066  Indices.push_back(Idx);
3067  GEPIndices.push_back(IRB.getInt32(Idx));
3068  emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
3069  GEPIndices.pop_back();
3070  Indices.pop_back();
3071  }
3072  return;
3073  }
3074 
3075  llvm_unreachable("Only arrays and structs are aggregate loadable types");
3076  }
3077  };
3078 
3079  struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
3080  LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr)
3081  : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {}
3082 
3083  /// Emit a leaf load of a single value. This is called at the leaves of the
3084  /// recursive emission to actually load values.
3085  void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
3086  assert(Ty->isSingleValueType());
3087  // Load the single value and insert it using the indices.
3088  Value *GEP =
3089  IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
3090  Value *Load = IRB.CreateLoad(GEP, Name + ".load");
3091  Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
3092  DEBUG(dbgs() << " to: " << *Load << "\n");
3093  }
3094  };
3095 
3096  bool visitLoadInst(LoadInst &LI) {
3097  assert(LI.getPointerOperand() == *U);
3098  if (!LI.isSimple() || LI.getType()->isSingleValueType())
3099  return false;
3100 
3101  // We have an aggregate being loaded, split it apart.
3102  DEBUG(dbgs() << " original: " << LI << "\n");
3103  LoadOpSplitter Splitter(&LI, *U);
3104  Value *V = UndefValue::get(LI.getType());
3105  Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
3106  LI.replaceAllUsesWith(V);
3107  LI.eraseFromParent();
3108  return true;
3109  }
3110 
3111  struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
3112  StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr)
3113  : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {}
3114 
3115  /// Emit a leaf store of a single value. This is called at the leaves of the
3116  /// recursive emission to actually produce stores.
3117  void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
3118  assert(Ty->isSingleValueType());
3119  // Extract the single value and store it using the indices.
3120  //
3121  // The gep and extractvalue values are factored out of the CreateStore
3122  // call to make the output independent of the argument evaluation order.
3123  Value *ExtractValue =
3124  IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
3125  Value *InBoundsGEP =
3126  IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
3127  Value *Store = IRB.CreateStore(ExtractValue, InBoundsGEP);
3128  (void)Store;
3129  DEBUG(dbgs() << " to: " << *Store << "\n");
3130  }
3131  };
3132 
3133  bool visitStoreInst(StoreInst &SI) {
3134  if (!SI.isSimple() || SI.getPointerOperand() != *U)
3135  return false;
3136  Value *V = SI.getValueOperand();
3137  if (V->getType()->isSingleValueType())
3138  return false;
3139 
3140  // We have an aggregate being stored, split it apart.
3141  DEBUG(dbgs() << " original: " << SI << "\n");
3142  StoreOpSplitter Splitter(&SI, *U);
3143  Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
3144  SI.eraseFromParent();
3145  return true;
3146  }
3147 
3148  bool visitBitCastInst(BitCastInst &BC) {
3149  enqueueUsers(BC);
3150  return false;
3151  }
3152 
3153  bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
3154  enqueueUsers(GEPI);
3155  return false;
3156  }
3157 
3158  bool visitPHINode(PHINode &PN) {
3159  enqueueUsers(PN);
3160  return false;
3161  }
3162 
3163  bool visitSelectInst(SelectInst &SI) {
3164  enqueueUsers(SI);
3165  return false;
3166  }
3167 };
3168 }
3169 
3170 /// \brief Strip aggregate type wrapping.
3171 ///
3172 /// This removes no-op aggregate types wrapping an underlying type. It will
3173 /// strip as many layers of types as it can without changing either the type
3174 /// size or the allocated size.
3176  if (Ty->isSingleValueType())
3177  return Ty;
3178 
3179  uint64_t AllocSize = DL.getTypeAllocSize(Ty);
3180  uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
3181 
3182  Type *InnerTy;
3183  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3184  InnerTy = ArrTy->getElementType();
3185  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
3186  const StructLayout *SL = DL.getStructLayout(STy);
3187  unsigned Index = SL->getElementContainingOffset(0);
3188  InnerTy = STy->getElementType(Index);
3189  } else {
3190  return Ty;
3191  }
3192 
3193  if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
3194  TypeSize > DL.getTypeSizeInBits(InnerTy))
3195  return Ty;
3196 
3197  return stripAggregateTypeWrapping(DL, InnerTy);
3198 }
3199 
3200 /// \brief Try to find a partition of the aggregate type passed in for a given
3201 /// offset and size.
3202 ///
3203 /// This recurses through the aggregate type and tries to compute a subtype
3204 /// based on the offset and size. When the offset and size span a sub-section
3205 /// of an array, it will even compute a new array type for that sub-section,
3206 /// and the same for structs.
3207 ///
3208 /// Note that this routine is very strict and tries to find a partition of the
3209 /// type which produces the *exact* right offset and size. It is not forgiving
3210 /// when the size or offset cause either end of type-based partition to be off.
3211 /// Also, this is a best-effort routine. It is reasonable to give up and not
3212 /// return a type if necessary.
3213 static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
3214  uint64_t Size) {
3215  if (Offset == 0 && DL.getTypeAllocSize(Ty) == Size)
3216  return stripAggregateTypeWrapping(DL, Ty);
3217  if (Offset > DL.getTypeAllocSize(Ty) ||
3218  (DL.getTypeAllocSize(Ty) - Offset) < Size)
3219  return nullptr;
3220 
3221  if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
3222  Type *ElementTy = SeqTy->getElementType();
3223  uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
3224  uint64_t NumSkippedElements = Offset / ElementSize;
3225  if (NumSkippedElements >= SeqTy->getNumElements())
3226  return nullptr;
3227  Offset -= NumSkippedElements * ElementSize;
3228 
3229  // First check if we need to recurse.
3230  if (Offset > 0 || Size < ElementSize) {
3231  // Bail if the partition ends in a different array element.
3232  if ((Offset + Size) > ElementSize)
3233  return nullptr;
3234  // Recurse through the element type trying to peel off offset bytes.
3235  return getTypePartition(DL, ElementTy, Offset, Size);
3236  }
3237  assert(Offset == 0);
3238 
3239  if (Size == ElementSize)
3240  return stripAggregateTypeWrapping(DL, ElementTy);
3241  assert(Size > ElementSize);
3242  uint64_t NumElements = Size / ElementSize;
3243  if (NumElements * ElementSize != Size)
3244  return nullptr;
3245  return ArrayType::get(ElementTy, NumElements);
3246  }
3247 
3248  StructType *STy = dyn_cast<StructType>(Ty);
3249  if (!STy)
3250  return nullptr;
3251 
3252  const StructLayout *SL = DL.getStructLayout(STy);
3253  if (Offset >= SL->getSizeInBytes())
3254  return nullptr;
3255  uint64_t EndOffset = Offset + Size;
3256  if (EndOffset > SL->getSizeInBytes())
3257  return nullptr;
3258 
3259  unsigned Index = SL->getElementContainingOffset(Offset);
3260  Offset -= SL->getElementOffset(Index);
3261 
3262  Type *ElementTy = STy->getElementType(Index);
3263  uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
3264  if (Offset >= ElementSize)
3265  return nullptr; // The offset points into alignment padding.
3266 
3267  // See if any partition must be contained by the element.
3268  if (Offset > 0 || Size < ElementSize) {
3269  if ((Offset + Size) > ElementSize)
3270  return nullptr;
3271  return getTypePartition(DL, ElementTy, Offset, Size);
3272  }
3273  assert(Offset == 0);
3274 
3275  if (Size == ElementSize)
3276  return stripAggregateTypeWrapping(DL, ElementTy);
3277 
3278  StructType::element_iterator EI = STy->element_begin() + Index,
3279  EE = STy->element_end();
3280  if (EndOffset < SL->getSizeInBytes()) {
3281  unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
3282  if (Index == EndIndex)
3283  return nullptr; // Within a single element and its padding.
3284 
3285  // Don't try to form "natural" types if the elements don't line up with the
3286  // expected size.
3287  // FIXME: We could potentially recurse down through the last element in the
3288  // sub-struct to find a natural end point.
3289  if (SL->getElementOffset(EndIndex) != EndOffset)
3290  return nullptr;
3291 
3292  assert(Index < EndIndex);
3293  EE = STy->element_begin() + EndIndex;
3294  }
3295 
3296  // Try to build up a sub-structure.
3297  StructType *SubTy =
3298  StructType::get(STy->getContext(), makeArrayRef(EI, EE), STy->isPacked());
3299  const StructLayout *SubSL = DL.getStructLayout(SubTy);
3300  if (Size != SubSL->getSizeInBytes())
3301  return nullptr; // The sub-struct doesn't have quite the size needed.
3302 
3303  return SubTy;
3304 }
3305 
3306 /// \brief Pre-split loads and stores to simplify rewriting.
3307 ///
3308 /// We want to break up the splittable load+store pairs as much as
3309 /// possible. This is important to do as a preprocessing step, as once we
3310 /// start rewriting the accesses to partitions of the alloca we lose the
3311 /// necessary information to correctly split apart paired loads and stores
3312 /// which both point into this alloca. The case to consider is something like
3313 /// the following:
3314 ///
3315 /// %a = alloca [12 x i8]
3316 /// %gep1 = getelementptr [12 x i8]* %a, i32 0, i32 0
3317 /// %gep2 = getelementptr [12 x i8]* %a, i32 0, i32 4
3318 /// %gep3 = getelementptr [12 x i8]* %a, i32 0, i32 8
3319 /// %iptr1 = bitcast i8* %gep1 to i64*
3320 /// %iptr2 = bitcast i8* %gep2 to i64*
3321 /// %fptr1 = bitcast i8* %gep1 to float*
3322 /// %fptr2 = bitcast i8* %gep2 to float*
3323 /// %fptr3 = bitcast i8* %gep3 to float*
3324 /// store float 0.0, float* %fptr1
3325 /// store float 1.0, float* %fptr2
3326 /// %v = load i64* %iptr1
3327 /// store i64 %v, i64* %iptr2
3328 /// %f1 = load float* %fptr2
3329 /// %f2 = load float* %fptr3
3330 ///
3331 /// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
3332 /// promote everything so we recover the 2 SSA values that should have been
3333 /// there all along.
3334 ///
3335 /// \returns true if any changes are made.
3336 bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
3337  DEBUG(dbgs() << "Pre-splitting loads and stores\n");
3338 
3339  // Track the loads and stores which are candidates for pre-splitting here, in
3340  // the order they first appear during the partition scan. These give stable
3341  // iteration order and a basis for tracking which loads and stores we
3342  // actually split.
3345 
3346  // We need to accumulate the splits required of each load or store where we
3347  // can find them via a direct lookup. This is important to cross-check loads
3348  // and stores against each other. We also track the slice so that we can kill
3349  // all the slices that end up split.
3350  struct SplitOffsets {
3351  Slice *S;
3352  std::vector<uint64_t> Splits;
3353  };
3355 
3356  // Track loads out of this alloca which cannot, for any reason, be pre-split.
3357  // This is important as we also cannot pre-split stores of those loads!
3358  // FIXME: This is all pretty gross. It means that we can be more aggressive
3359  // in pre-splitting when the load feeding the store happens to come from
3360  // a separate alloca. Put another way, the effectiveness of SROA would be
3361  // decreased by a frontend which just concatenated all of its local allocas
3362  // into one big flat alloca. But defeating such patterns is exactly the job
3363  // SROA is tasked with! Sadly, to not have this discrepancy we would have
3364  // change store pre-splitting to actually force pre-splitting of the load
3365  // that feeds it *and all stores*. That makes pre-splitting much harder, but
3366  // maybe it would make it more principled?
3367  SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
3368 
3369  DEBUG(dbgs() << " Searching for candidate loads and stores\n");
3370  for (auto &P : AS.partitions()) {
3371  for (Slice &S : P) {
3372  Instruction *I = cast<Instruction>(S.getUse()->getUser());
3373  if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
3374  // If this is a load we have to track that it can't participate in any
3375  // pre-splitting. If this is a store of a load we have to track that
3376  // that load also can't participate in any pre-splitting.
3377  if (auto *LI = dyn_cast<LoadInst>(I))
3378  UnsplittableLoads.insert(LI);
3379  else if (auto *SI = dyn_cast<StoreInst>(I))
3380  if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
3381  UnsplittableLoads.insert(LI);
3382  continue;
3383  }
3384  assert(P.endOffset() > S.beginOffset() &&
3385  "Empty or backwards partition!");
3386 
3387  // Determine if this is a pre-splittable slice.
3388  if (auto *LI = dyn_cast<LoadInst>(I)) {
3389  assert(!LI->isVolatile() && "Cannot split volatile loads!");
3390 
3391  // The load must be used exclusively to store into other pointers for
3392  // us to be able to arbitrarily pre-split it. The stores must also be
3393  // simple to avoid changing semantics.
3394  auto IsLoadSimplyStored = [](LoadInst *LI) {
3395  for (User *LU : LI->users()) {
3396  auto *SI = dyn_cast<StoreInst>(LU);
3397  if (!SI || !SI->isSimple())
3398  return false;
3399  }
3400  return true;
3401  };
3402  if (!IsLoadSimplyStored(LI)) {
3403  UnsplittableLoads.insert(LI);
3404  continue;
3405  }
3406 
3407  Loads.push_back(LI);
3408  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3409  if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
3410  // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
3411  continue;
3412  auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
3413  if (!StoredLoad || !StoredLoad->isSimple())
3414  continue;
3415  assert(!SI->isVolatile() && "Cannot split volatile stores!");
3416 
3417  Stores.push_back(SI);
3418  } else {
3419  // Other uses cannot be pre-split.
3420  continue;
3421  }
3422 
3423  // Record the initial split.
3424  DEBUG(dbgs() << " Candidate: " << *I << "\n");
3425  auto &Offsets = SplitOffsetsMap[I];
3426  assert(Offsets.Splits.empty() &&
3427  "Should not have splits the first time we see an instruction!");
3428  Offsets.S = &S;
3429  Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
3430  }
3431 
3432  // Now scan the already split slices, and add a split for any of them which
3433  // we're going to pre-split.
3434  for (Slice *S : P.splitSliceTails()) {
3435  auto SplitOffsetsMapI =
3436  SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
3437  if (SplitOffsetsMapI == SplitOffsetsMap.end())
3438  continue;
3439  auto &Offsets = SplitOffsetsMapI->second;
3440 
3441  assert(Offsets.S == S && "Found a mismatched slice!");
3442  assert(!Offsets.Splits.empty() &&
3443  "Cannot have an empty set of splits on the second partition!");
3444  assert(Offsets.Splits.back() ==
3445  P.beginOffset() - Offsets.S->beginOffset() &&
3446  "Previous split does not end where this one begins!");
3447 
3448  // Record each split. The last partition's end isn't needed as the size
3449  // of the slice dictates that.
3450  if (S->endOffset() > P.endOffset())
3451  Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
3452  }
3453  }
3454 
3455  // We may have split loads where some of their stores are split stores. For
3456  // such loads and stores, we can only pre-split them if their splits exactly
3457  // match relative to their starting offset. We have to verify this prior to
3458  // any rewriting.
3459  Stores.erase(
3460  remove_if(Stores,
3461  [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
3462  // Lookup the load we are storing in our map of split
3463  // offsets.
3464  auto *LI = cast<LoadInst>(SI->getValueOperand());
3465  // If it was completely unsplittable, then we're done,
3466  // and this store can't be pre-split.
3467  if (UnsplittableLoads.count(LI))
3468  return true;
3469 
3470  auto LoadOffsetsI = SplitOffsetsMap.find(LI);
3471  if (LoadOffsetsI == SplitOffsetsMap.end())
3472  return false; // Unrelated loads are definitely safe.
3473  auto &LoadOffsets = LoadOffsetsI->second;
3474 
3475  // Now lookup the store's offsets.
3476  auto &StoreOffsets = SplitOffsetsMap[SI];
3477 
3478  // If the relative offsets of each split in the load and
3479  // store match exactly, then we can split them and we
3480  // don't need to remove them here.
3481  if (LoadOffsets.Splits == StoreOffsets.Splits)
3482  return false;
3483 
3484  DEBUG(dbgs() << " Mismatched splits for load and store:\n"
3485  << " " << *LI << "\n"
3486  << " " << *SI << "\n");
3487 
3488  // We've found a store and load that we need to split
3489  // with mismatched relative splits. Just give up on them
3490  // and remove both instructions from our list of
3491  // candidates.
3492  UnsplittableLoads.insert(LI);
3493  return true;
3494  }),
3495  Stores.end());
3496  // Now we have to go *back* through all the stores, because a later store may
3497  // have caused an earlier store's load to become unsplittable and if it is
3498  // unsplittable for the later store, then we can't rely on it being split in
3499  // the earlier store either.
3500  Stores.erase(remove_if(Stores,
3501  [&UnsplittableLoads](StoreInst *SI) {
3502  auto *LI = cast<LoadInst>(SI->getValueOperand());
3503  return UnsplittableLoads.count(LI);
3504  }),
3505  Stores.end());
3506  // Once we've established all the loads that can't be split for some reason,
3507  // filter any that made it into our list out.
3508  Loads.erase(remove_if(Loads,
3509  [&UnsplittableLoads](LoadInst *LI) {
3510  return UnsplittableLoads.count(LI);
3511  }),
3512  Loads.end());
3513 
3514  // If no loads or stores are left, there is no pre-splitting to be done for
3515  // this alloca.
3516  if (Loads.empty() && Stores.empty())
3517  return false;
3518 
3519  // From here on, we can't fail and will be building new accesses, so rig up
3520  // an IR builder.
3521  IRBuilderTy IRB(&AI);
3522 
3523  // Collect the new slices which we will merge into the alloca slices.
3524  SmallVector<Slice, 4> NewSlices;
3525 
3526  // Track any allocas we end up splitting loads and stores for so we iterate
3527  // on them.
3528  SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
3529 
3530  // At this point, we have collected all of the loads and stores we can
3531  // pre-split, and the specific splits needed for them. We actually do the
3532  // splitting in a specific order in order to handle when one of the loads in
3533  // the value operand to one of the stores.
3534  //
3535  // First, we rewrite all of the split loads, and just accumulate each split
3536  // load in a parallel structure. We also build the slices for them and append
3537  // them to the alloca slices.
3539  std::vector<LoadInst *> SplitLoads;
3540  const DataLayout &DL = AI.getModule()->getDataLayout();
3541  for (LoadInst *LI : Loads) {
3542  SplitLoads.clear();
3543 
3544  IntegerType *Ty = cast<IntegerType>(LI->getType());
3545  uint64_t LoadSize = Ty->getBitWidth() / 8;
3546  assert(LoadSize > 0 && "Cannot have a zero-sized integer load!");
3547 
3548  auto &Offsets = SplitOffsetsMap[LI];
3549  assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
3550  "Slice size should always match load size exactly!");
3551  uint64_t BaseOffset = Offsets.S->beginOffset();
3552  assert(BaseOffset + LoadSize > BaseOffset &&
3553  "Cannot represent alloca access size using 64-bit integers!");
3554 
3555  Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
3556  IRB.SetInsertPoint(LI);
3557 
3558  DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
3559 
3560  uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
3561  int Idx = 0, Size = Offsets.Splits.size();
3562  for (;;) {
3563  auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
3564  auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
3565  LoadInst *PLoad = IRB.CreateAlignedLoad(
3566  getAdjustedPtr(IRB, DL, BasePtr,
3567  APInt(DL.getPointerSizeInBits(), PartOffset),
3568  PartPtrTy, BasePtr->getName() + "."),
3569  getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
3570  LI->getName());
3571  PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
3572 
3573  // Append this load onto the list of split loads so we can find it later
3574  // to rewrite the stores.
3575  SplitLoads.push_back(PLoad);
3576 
3577  // Now build a new slice for the alloca.
3578  NewSlices.push_back(
3579  Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3580  &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
3581  /*IsSplittable*/ false));
3582  DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
3583  << ", " << NewSlices.back().endOffset() << "): " << *PLoad
3584  << "\n");
3585 
3586  // See if we've handled all the splits.
3587  if (Idx >= Size)
3588  break;
3589 
3590  // Setup the next partition.
3591  PartOffset = Offsets.Splits[Idx];
3592  ++Idx;
3593  PartSize = (Idx < Size ? Offsets.Splits[Idx] : LoadSize) - PartOffset;
3594  }
3595 
3596  // Now that we have the split loads, do the slow walk over all uses of the
3597  // load and rewrite them as split stores, or save the split loads to use
3598  // below if the store is going to be split there anyways.
3599  bool DeferredStores = false;
3600  for (User *LU : LI->users()) {
3601  StoreInst *SI = cast<StoreInst>(LU);
3602  if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
3603  DeferredStores = true;
3604  DEBUG(dbgs() << " Deferred splitting of store: " << *SI << "\n");
3605  continue;
3606  }
3607 
3608  Value *StoreBasePtr = SI->getPointerOperand();
3609  IRB.SetInsertPoint(SI);
3610 
3611  DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
3612 
3613  for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
3614  LoadInst *PLoad = SplitLoads[Idx];
3615  uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
3616  auto *PartPtrTy =
3617  PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
3618 
3619  StoreInst *PStore = IRB.CreateAlignedStore(
3620  PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
3621  APInt(DL.getPointerSizeInBits(), PartOffset),
3622  PartPtrTy, StoreBasePtr->getName() + "."),
3623  getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
3624  PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
3625  DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
3626  }
3627 
3628  // We want to immediately iterate on any allocas impacted by splitting
3629  // this store, and we have to track any promotable alloca (indicated by
3630  // a direct store) as needing to be resplit because it is no longer
3631  // promotable.
3632  if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
3633  ResplitPromotableAllocas.insert(OtherAI);
3634  Worklist.insert(OtherAI);
3635  } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3636  StoreBasePtr->stripInBoundsOffsets())) {
3637  Worklist.insert(OtherAI);
3638  }
3639 
3640  // Mark the original store as dead.
3641  DeadInsts.insert(SI);
3642  }
3643 
3644  // Save the split loads if there are deferred stores among the users.
3645  if (DeferredStores)
3646  SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
3647 
3648  // Mark the original load as dead and kill the original slice.
3649  DeadInsts.insert(LI);
3650  Offsets.S->kill();
3651  }
3652 
3653  // Second, we rewrite all of the split stores. At this point, we know that
3654  // all loads from this alloca have been split already. For stores of such
3655  // loads, we can simply look up the pre-existing split loads. For stores of
3656  // other loads, we split those loads first and then write split stores of
3657  // them.
3658  for (StoreInst *SI : Stores) {
3659  auto *LI = cast<LoadInst>(SI->getValueOperand());
3660  IntegerType *Ty = cast<IntegerType>(LI->getType());
3661  uint64_t StoreSize = Ty->getBitWidth() / 8;
3662  assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
3663 
3664  auto &Offsets = SplitOffsetsMap[SI];
3665  assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
3666  "Slice size should always match load size exactly!");
3667  uint64_t BaseOffset = Offsets.S->beginOffset();
3668  assert(BaseOffset + StoreSize > BaseOffset &&
3669  "Cannot represent alloca access size using 64-bit integers!");
3670 
3671  Value *LoadBasePtr = LI->getPointerOperand();
3672  Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
3673 
3674  DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
3675 
3676  // Check whether we have an already split load.
3677  auto SplitLoadsMapI = SplitLoadsMap.find(LI);
3678  std::vector<LoadInst *> *SplitLoads = nullptr;
3679  if (SplitLoadsMapI != SplitLoadsMap.end()) {
3680  SplitLoads = &SplitLoadsMapI->second;
3681  assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
3682  "Too few split loads for the number of splits in the store!");
3683  } else {
3684  DEBUG(dbgs() << " of load: " << *LI << "\n");
3685  }
3686 
3687  uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
3688  int Idx = 0, Size = Offsets.Splits.size();
3689  for (;;) {
3690  auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
3691  auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
3692 
3693  // Either lookup a split load or create one.
3694  LoadInst *PLoad;
3695  if (SplitLoads) {
3696  PLoad = (*SplitLoads)[Idx];
3697  } else {
3698  IRB.SetInsertPoint(LI);
3699  PLoad = IRB.CreateAlignedLoad(
3700  getAdjustedPtr(IRB, DL, LoadBasePtr,
3701  APInt(DL.getPointerSizeInBits(), PartOffset),
3702  PartPtrTy, LoadBasePtr->getName() + "."),
3703  getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
3704  LI->getName());
3705  }
3706 
3707  // And store this partition.
3708  IRB.SetInsertPoint(SI);
3709  StoreInst *PStore = IRB.CreateAlignedStore(
3710  PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
3711  APInt(DL.getPointerSizeInBits(), PartOffset),
3712  PartPtrTy, StoreBasePtr->getName() + "."),
3713  getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
3714 
3715  // Now build a new slice for the alloca.
3716  NewSlices.push_back(
3717  Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3718  &PStore->getOperandUse(PStore->getPointerOperandIndex()),
3719  /*IsSplittable*/ false));
3720  DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
3721  << ", " << NewSlices.back().endOffset() << "): " << *PStore
3722  << "\n");
3723  if (!SplitLoads) {
3724  DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
3725  }
3726 
3727  // See if we've finished all the splits.
3728  if (Idx >= Size)
3729  break;
3730 
3731  // Setup the next partition.
3732  PartOffset = Offsets.Splits[Idx];
3733  ++Idx;
3734  PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
3735  }
3736 
3737  // We want to immediately iterate on any allocas impacted by splitting
3738  // this load, which is only relevant if it isn't a load of this alloca and
3739  // thus we didn't already split the loads above. We also have to keep track
3740  // of any promotable allocas we split loads on as they can no longer be
3741  // promoted.
3742  if (!SplitLoads) {
3743  if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
3744  assert(OtherAI != &AI && "We can't re-split our own alloca!");
3745  ResplitPromotableAllocas.insert(OtherAI);
3746  Worklist.insert(OtherAI);
3747  } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3748  LoadBasePtr->stripInBoundsOffsets())) {
3749  assert(OtherAI != &AI && "We can't re-split our own alloca!");
3750  Worklist.insert(OtherAI);
3751  }
3752  }
3753 
3754  // Mark the original store as dead now that we've split it up and kill its
3755  // slice. Note that we leave the original load in place unless this store
3756  // was its only use. It may in turn be split up if it is an alloca load
3757  // for some other alloca, but it may be a normal load. This may introduce
3758  // redundant loads, but where those can be merged the rest of the optimizer
3759  // should handle the merging, and this uncovers SSA splits which is more
3760  // important. In practice, the original loads will almost always be fully
3761  // split and removed eventually, and the splits will be merged by any
3762  // trivial CSE, including instcombine.
3763  if (LI->hasOneUse()) {
3764  assert(*LI->user_begin() == SI && "Single use isn't this store!");
3765  DeadInsts.insert(LI);
3766  }
3767  DeadInsts.insert(SI);
3768  Offsets.S->kill();
3769  }
3770 
3771  // Remove the killed slices that have ben pre-split.
3772  AS.erase(remove_if(AS, [](const Slice &S) { return S.isDead(); }), AS.end());
3773 
3774  // Insert our new slices. This will sort and merge them into the sorted
3775  // sequence.
3776  AS.insert(NewSlices);
3777 
3778  DEBUG(dbgs() << " Pre-split slices:\n");
3779 #ifndef NDEBUG
3780  for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
3781  DEBUG(AS.print(dbgs(), I, " "));
3782 #endif
3783 
3784  // Finally, don't try to promote any allocas that new require re-splitting.
3785  // They have already been added to the worklist above.
3786  PromotableAllocas.erase(
3787  remove_if(
3788  PromotableAllocas,
3789  [&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }),
3790  PromotableAllocas.end());
3791 
3792  return true;
3793 }
3794 
3795 /// \brief Rewrite an alloca partition's users.
3796 ///
3797 /// This routine drives both of the rewriting goals of the SROA pass. It tries
3798 /// to rewrite uses of an alloca partition to be conducive for SSA value
3799 /// promotion. If the partition needs a new, more refined alloca, this will
3800 /// build that new alloca, preserving as much type information as possible, and
3801 /// rewrite the uses of the old alloca to point at the new one and have the
3802 /// appropriate new offsets. It also evaluates how successful the rewrite was
3803 /// at enabling promotion and if it was successful queues the alloca to be
3804 /// promoted.
3805 AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
3806  Partition &P) {
3807  // Try to compute a friendly type for this partition of the alloca. This
3808  // won't always succeed, in which case we fall back to a legal integer type
3809  // or an i8 array of an appropriate size.
3810  Type *SliceTy = nullptr;
3811  const DataLayout &DL = AI.getModule()->getDataLayout();
3812  if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
3813  if (DL.getTypeAllocSize(CommonUseTy) >= P.size())
3814  SliceTy = CommonUseTy;
3815  if (!SliceTy)
3816  if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
3817  P.beginOffset(), P.size()))
3818  SliceTy = TypePartitionTy;
3819  if ((!SliceTy || (SliceTy->isArrayTy() &&
3820  SliceTy->getArrayElementType()->isIntegerTy())) &&
3821  DL.isLegalInteger(P.size() * 8))
3822  SliceTy = Type::getIntNTy(*C, P.size() * 8);
3823  if (!SliceTy)
3824  SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
3825  assert(DL.getTypeAllocSize(SliceTy) >= P.size());
3826 
3827  bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
3828 
3829  VectorType *VecTy =
3830  IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
3831  if (VecTy)
3832  SliceTy = VecTy;
3833 
3834  // Check for the case where we're going to rewrite to a new alloca of the
3835  // exact same type as the original, and with the same access offsets. In that
3836  // case, re-use the existing alloca, but still run through the rewriter to
3837  // perform phi and select speculation.
3838  AllocaInst *NewAI;
3839  if (SliceTy == AI.getAllocatedType()) {
3840  assert(P.beginOffset() == 0 &&
3841  "Non-zero begin offset but same alloca type");
3842  NewAI = &AI;
3843  // FIXME: We should be able to bail at this point with "nothing changed".
3844  // FIXME: We might want to defer PHI speculation until after here.
3845  // FIXME: return nullptr;
3846  } else {
3847  unsigned Alignment = AI.getAlignment();
3848  if (!Alignment) {
3849  // The minimum alignment which users can rely on when the explicit
3850  // alignment is omitted or zero is that required by the ABI for this
3851  // type.
3852  Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
3853  }
3854  Alignment = MinAlign(Alignment, P.beginOffset());
3855  // If we will get at least this much alignment from the type alone, leave
3856  // the alloca's alignment unconstrained.
3857  if (Alignment <= DL.getABITypeAlignment(SliceTy))
3858  Alignment = 0;
3859  NewAI = new AllocaInst(
3860  SliceTy, nullptr, Alignment,
3861  AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
3862  ++NumNewAllocas;
3863  }
3864 
3865  DEBUG(dbgs() << "Rewriting alloca partition "
3866  << "[" << P.beginOffset() << "," << P.endOffset()
3867  << ") to: " << *NewAI << "\n");
3868 
3869  // Track the high watermark on the worklist as it is only relevant for
3870  // promoted allocas. We will reset it to this point if the alloca is not in
3871  // fact scheduled for promotion.
3872  unsigned PPWOldSize = PostPromotionWorklist.size();
3873  unsigned NumUses = 0;
3874  SmallPtrSet<PHINode *, 8> PHIUsers;
3875  SmallPtrSet<SelectInst *, 8> SelectUsers;
3876 
3877  AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
3878  P.endOffset(), IsIntegerPromotable, VecTy,
3879  PHIUsers, SelectUsers);
3880  bool Promotable = true;
3881  for (Slice *S : P.splitSliceTails()) {
3882  Promotable &= Rewriter.visit(S);
3883  ++NumUses;
3884  }
3885  for (Slice &S : P) {
3886  Promotable &= Rewriter.visit(&S);
3887  ++NumUses;
3888  }
3889 
3890  NumAllocaPartitionUses += NumUses;
3891  MaxUsesPerAllocaPartition =
3892  std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
3893 
3894  // Now that we've processed all the slices in the new partition, check if any
3895  // PHIs or Selects would block promotion.
3896  for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
3897  E = PHIUsers.end();
3898  I != E; ++I)
3899  if (!isSafePHIToSpeculate(**I)) {
3900  Promotable = false;
3901  PHIUsers.clear();
3902  SelectUsers.clear();
3903  break;
3904  }
3905  for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
3906  E = SelectUsers.end();
3907  I != E; ++I)
3908  if (!isSafeSelectToSpeculate(**I)) {
3909  Promotable = false;
3910  PHIUsers.clear();
3911  SelectUsers.clear();
3912  break;
3913  }
3914 
3915  if (Promotable) {
3916  if (PHIUsers.empty() && SelectUsers.empty()) {
3917  // Promote the alloca.
3918  PromotableAllocas.push_back(NewAI);
3919  } else {
3920  // If we have either PHIs or Selects to speculate, add them to those
3921  // worklists and re-queue the new alloca so that we promote in on the
3922  // next iteration.
3923  for (PHINode *PHIUser : PHIUsers)
3924  SpeculatablePHIs.insert(PHIUser);
3925  for (SelectInst *SelectUser : SelectUsers)
3926  SpeculatableSelects.insert(SelectUser);
3927  Worklist.insert(NewAI);
3928  }
3929  } else {
3930  // Drop any post-promotion work items if promotion didn't happen.
3931  while (PostPromotionWorklist.size() > PPWOldSize)
3932  PostPromotionWorklist.pop_back();
3933 
3934  // We couldn't promote and we didn't create a new partition, nothing
3935  // happened.
3936  if (NewAI == &AI)
3937  return nullptr;
3938 
3939  // If we can't promote the alloca, iterate on it to check for new
3940  // refinements exposed by splitting the current alloca. Don't iterate on an
3941  // alloca which didn't actually change and didn't get promoted.
3942  Worklist.insert(NewAI);
3943  }
3944 
3945  return NewAI;
3946 }
3947 
3948 /// \brief Walks the slices of an alloca and form partitions based on them,
3949 /// rewriting each of their uses.
3950 bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
3951  if (AS.begin() == AS.end())
3952  return false;
3953 
3954  unsigned NumPartitions = 0;
3955  bool Changed = false;
3956  const DataLayout &DL = AI.getModule()->getDataLayout();
3957 
3958  // First try to pre-split loads and stores.
3959  Changed |= presplitLoadsAndStores(AI, AS);
3960 
3961  // Now that we have identified any pre-splitting opportunities, mark any
3962  // splittable (non-whole-alloca) loads and stores as unsplittable. If we fail
3963  // to split these during pre-splitting, we want to force them to be
3964  // rewritten into a partition.
3965  bool IsSorted = true;
3966  for (Slice &S : AS) {
3967  if (!S.isSplittable())
3968  continue;
3969  // FIXME: We currently leave whole-alloca splittable loads and stores. This
3970  // used to be the only splittable loads and stores and we need to be
3971  // confident that the above handling of splittable loads and stores is
3972  // completely sufficient before we forcibly disable the remaining handling.
3973  if (S.beginOffset() == 0 &&
3974  S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType()))
3975  continue;
3976  if (isa<LoadInst>(S.getUse()->getUser()) ||
3977  isa<StoreInst>(S.getUse()->getUser())) {
3978  S.makeUnsplittable();
3979  IsSorted = false;
3980  }
3981  }
3982  if (!IsSorted)
3983  std::sort(AS.begin(), AS.end());
3984 
3985  /// Describes the allocas introduced by rewritePartition in order to migrate
3986  /// the debug info.
3987  struct Fragment {
3988  AllocaInst *Alloca;
3989  uint64_t Offset;
3990  uint64_t Size;
3991  Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
3992  : Alloca(AI), Offset(O), Size(S) {}
3993  };
3994  SmallVector<Fragment, 4> Fragments;
3995 
3996  // Rewrite each partition.
3997  for (auto &P : AS.partitions()) {
3998  if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
3999  Changed = true;
4000  if (NewAI != &AI) {
4001  uint64_t SizeOfByte = 8;
4002  uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
4003  // Don't include any padding.
4004  uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
4005  Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
4006  }
4007  }
4008  ++NumPartitions;
4009  }
4010 
4011  NumAllocaPartitions += NumPartitions;
4012  MaxPartitionsPerAlloca =
4013  std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
4014 
4015  // Migrate debug information from the old alloca to the new alloca(s)
4016  // and the individual partitions.
4017  if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
4018  auto *Var = DbgDecl->getVariable();
4019  auto *Expr = DbgDecl->getExpression();
4020  DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
4021  uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType());
4022  for (auto Fragment : Fragments) {
4023  // Create a fragment expression describing the new partition or reuse AI's
4024  // expression if there is only one partition.
4025  auto *FragmentExpr = Expr;
4026  if (Fragment.Size < AllocaSize || Expr->isFragment()) {
4027  // If this alloca is already a scalar replacement of a larger aggregate,
4028  // Fragment.Offset describes the offset inside the scalar.
4029  auto ExprFragment = Expr->getFragmentInfo();
4030  uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
4031  uint64_t Start = Offset + Fragment.Offset;
4032  uint64_t Size = Fragment.Size;
4033  if (ExprFragment) {
4034  uint64_t AbsEnd =
4035  ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
4036  if (Start >= AbsEnd)
4037  // No need to describe a SROAed padding.
4038  continue;
4039  Size = std::min(Size, AbsEnd - Start);
4040  }
4041  FragmentExpr = DIB.createFragmentExpression(Start, Size);
4042  }
4043 
4044  // Remove any existing dbg.declare intrinsic describing the same alloca.
4045  if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Fragment.Alloca))
4046  OldDDI->eraseFromParent();
4047 
4048  DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
4049  DbgDecl->getDebugLoc(), &AI);
4050  }
4051  }
4052  return Changed;
4053 }
4054 
4055 /// \brief Clobber a use with undef, deleting the used value if it becomes dead.
4056 void SROA::clobberUse(Use &U) {
4057  Value *OldV = U;
4058  // Replace the use with an undef value.
4059  U = UndefValue::get(OldV->getType());
4060 
4061  // Check for this making an instruction dead. We have to garbage collect
4062  // all the dead instructions to ensure the uses of any alloca end up being
4063  // minimal.
4064  if (Instruction *OldI = dyn_cast<Instruction>(OldV))
4065  if (isInstructionTriviallyDead(OldI)) {
4066  DeadInsts.insert(OldI);
4067  }
4068 }
4069 
4070 /// \brief Analyze an alloca for SROA.
4071 ///
4072 /// This analyzes the alloca to ensure we can reason about it, builds
4073 /// the slices of the alloca, and then hands it off to be split and
4074 /// rewritten as needed.
4075 bool SROA::runOnAlloca(AllocaInst &AI) {
4076  DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
4077  ++NumAllocasAnalyzed;
4078 
4079  // Special case dead allocas, as they're trivial.
4080  if (AI.use_empty()) {
4081  AI.eraseFromParent();
4082  return true;
4083  }
4084  const DataLayout &DL = AI.getModule()->getDataLayout();
4085 
4086  // Skip alloca forms that this analysis can't handle.
4087  if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
4088  DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
4089  return false;
4090 
4091  bool Changed = false;
4092 
4093  // First, split any FCA loads and stores touching this alloca to promote
4094  // better splitting and promotion opportunities.
4095  AggLoadStoreRewriter AggRewriter;
4096  Changed |= AggRewriter.rewrite(AI);
4097 
4098  // Build the slices using a recursive instruction-visiting builder.
4099  AllocaSlices AS(DL, AI);
4100  DEBUG(AS.print(dbgs()));
4101  if (AS.isEscaped())
4102  return Changed;
4103 
4104  // Delete all the dead users of this alloca before splitting and rewriting it.
4105  for (Instruction *DeadUser : AS.getDeadUsers()) {
4106  // Free up everything used by this instruction.
4107  for (Use &DeadOp : DeadUser->operands())
4108  clobberUse(DeadOp);
4109 
4110  // Now replace the uses of this instruction.
4111  DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType()));
4112 
4113  // And mark it for deletion.
4114  DeadInsts.insert(DeadUser);
4115  Changed = true;
4116  }
4117  for (Use *DeadOp : AS.getDeadOperands()) {
4118  clobberUse(*DeadOp);
4119  Changed = true;
4120  }
4121 
4122  // No slices to split. Leave the dead alloca for a later pass to clean up.
4123  if (AS.begin() == AS.end())
4124  return Changed;
4125 
4126  Changed |= splitAlloca(AI, AS);
4127 
4128  DEBUG(dbgs() << " Speculating PHIs\n");
4129  while (!SpeculatablePHIs.empty())
4130  speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val());
4131 
4132  DEBUG(dbgs() << " Speculating Selects\n");
4133  while (!SpeculatableSelects.empty())
4134  speculateSelectInstLoads(*SpeculatableSelects.pop_back_val());
4135 
4136  return Changed;
4137 }
4138 
4139 /// \brief Delete the dead instructions accumulated in this run.
4140 ///
4141 /// Recursively deletes the dead instructions we've accumulated. This is done
4142 /// at the very end to maximize locality of the recursive delete and to
4143 /// minimize the problems of invalidated instruction pointers as such pointers
4144 /// are used heavily in the intermediate stages of the algorithm.
4145 ///
4146 /// We also record the alloca instructions deleted here so that they aren't
4147 /// subsequently handed to mem2reg to promote.
4148 void SROA::deleteDeadInstructions(
4149  SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
4150  while (!DeadInsts.empty()) {
4151  Instruction *I = DeadInsts.pop_back_val();
4152  DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
4153 
4154  I->replaceAllUsesWith(UndefValue::get(I->getType()));
4155 
4156  for (Use &Operand : I->operands())
4157  if (Instruction *U = dyn_cast<Instruction>(Operand)) {
4158  // Zero out the operand and see if it becomes trivially dead.
4159  Operand = nullptr;
4161  DeadInsts.insert(U);
4162  }
4163 
4164  if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
4165  DeletedAllocas.insert(AI);
4166  if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI))
4167  DbgDecl->eraseFromParent();
4168  }
4169 
4170  ++NumDeleted;
4171  I->eraseFromParent();
4172  }
4173 }
4174 
4175 /// \brief Promote the allocas, using the best available technique.
4176 ///
4177 /// This attempts to promote whatever allocas have been identified as viable in
4178 /// the PromotableAllocas list. If that list is empty, there is nothing to do.
4179 /// This function returns whether any promotion occurred.
4180 bool SROA::promoteAllocas(Function &F) {
4181  if (PromotableAllocas.empty())
4182  return false;
4183 
4184  NumPromoted += PromotableAllocas.size();
4185 
4186  DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
4187  PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
4188  PromotableAllocas.clear();
4189  return true;
4190 }
4191 
4193  AssumptionCache &RunAC) {
4194  DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
4195  C = &F.getContext();
4196  DT = &RunDT;
4197  AC = &RunAC;
4198 
4199  BasicBlock &EntryBB = F.getEntryBlock();
4200  for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
4201  I != E; ++I) {
4202  if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
4203  Worklist.insert(AI);
4204  }
4205 
4206  bool Changed = false;
4207  // A set of deleted alloca instruction pointers which should be removed from
4208  // the list of promotable allocas.
4209  SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
4210 
4211  do {
4212  while (!Worklist.empty()) {
4213  Changed |= runOnAlloca(*Worklist.pop_back_val());
4214  deleteDeadInstructions(DeletedAllocas);
4215 
4216  // Remove the deleted allocas from various lists so that we don't try to
4217  // continue processing them.
4218  if (!DeletedAllocas.empty()) {
4219  auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
4220  Worklist.remove_if(IsInSet);
4221  PostPromotionWorklist.remove_if(IsInSet);
4222  PromotableAllocas.erase(remove_if(PromotableAllocas, IsInSet),
4223  PromotableAllocas.end());
4224  DeletedAllocas.clear();
4225  }
4226  }
4227 
4228  Changed |= promoteAllocas(F);
4229 
4230  Worklist = PostPromotionWorklist;
4231  PostPromotionWorklist.clear();
4232  } while (!Worklist.empty());
4233 
4234  if (!Changed)
4235  return PreservedAnalyses::all();
4236 
4237  // FIXME: Even when promoting allocas we should preserve some abstract set of
4238  // CFG-specific analyses.
4239  PreservedAnalyses PA;
4240  PA.preserve<GlobalsAA>();
4241  return PA;
4242 }
4243 
4245  return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
4246  AM.getResult<AssumptionAnalysis>(F));
4247 }
4248 
4249 /// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
4250 ///
4251 /// This is in the llvm namespace purely to allow it to be a friend of the \c
4252 /// SROA pass.
4254  /// The SROA implementation.
4255  SROA Impl;
4256 
4257 public:
4259  initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
4260  }
4261  bool runOnFunction(Function &F) override {
4262  if (skipFunction(F))
4263  return false;
4264 
4265  auto PA = Impl.runImpl(
4266  F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
4267  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
4268  return !PA.areAllPreserved();
4269  }
4270  void getAnalysisUsage(AnalysisUsage &AU) const override {
4274  AU.setPreservesCFG();
4275  }
4276 
4277  StringRef getPassName() const override { return "SROA"; }
4278  static char ID;
4279 };
4280 
4281 char SROALegacyPass::ID = 0;
4282 
4284 
4286  "Scalar Replacement Of Aggregates", false, false)
4290  false, false)
unsigned getAlignment() const
Legacy wrapper pass to provide the GlobalsAAResult object.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, Twine NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy...
Definition: SROA.cpp:1490
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:198
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL)
Test whether the given alloca partitioning and range of slices can be promoted to a vector...
Definition: SROA.cpp:1781
const Use & getOperandUse(unsigned i) const
Definition: User.h:158
Value * getValueOperand()
Definition: Instructions.h:391
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:76
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:177
An iterator over partitions of the alloca's slices.
Definition: SROA.cpp:391
RetTy visitMemSetInst(MemSetInst &I)
Definition: InstVisitor.h:214
iterator_range< use_iterator > uses()
Definition: Value.h:326
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
Base class for instruction visitors.
Definition: InstVisitor.h:81
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
void printUse(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
STATISTIC(NumFunctions,"Total number of functions")
size_t i
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:336
SynchronizationScope getSynchScope() const
Definition: Instructions.h:366
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:450
bool isVolatile() const
This is the interface for a simple mod/ref and alias analysis over globals.
DbgDeclareInst * FindAllocaDbgDeclare(Value *V)
Finds the llvm.dbg.declare intrinsic corresponding to an alloca, if any.
Definition: Local.cpp:1234
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:776
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
aarch64 AArch64 CCMP Pass
iterator begin() const
Definition: SROA.cpp:370
static Value * getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Get a natural GEP from a base pointer to a particular offset and resulting in a particular type...
Definition: SROA.cpp:1450
ConstantInt * getAlignmentCst() const
Value * getValue() const
Return the arguments to the instruction.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:617
void erase(iterator Start, iterator Stop)
Erase a range of slices.
Definition: SROA.cpp:221
iterator end() const
Definition: ArrayRef.h:130
bool isSimple() const
Definition: Instructions.h:384
This class represents a function call, abstracting a target machine's calling convention.
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2009
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:380
Representation of the alloca slices.
Definition: SROA.cpp:196
SmallVectorImpl< Slice >::iterator iterator
Support for iterating over the slices.
Definition: SROA.cpp:209
An immutable pass that tracks lazily created AssumptionCache objects.
gep_type_iterator gep_type_end(const User *GEP)
void insert(ArrayRef< Slice > NewSlices)
Insert new slices for this alloca.
Definition: SROA.cpp:228
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:450
A cache of .assume calls within a function.
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:777
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
This class wraps the llvm.memset intrinsic.
Scalar Replacement Of Aggregates
Definition: SROA.cpp:4289
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:736
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:189
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:471
An instruction for reading from memory.
Definition: Instructions.h:164
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:65
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:184
Hexagon Common GEP
Type * getElementType() const
Definition: DerivedTypes.h:462
void reserve(size_type N)
Definition: SmallVector.h:377
void setDest(Value *Ptr)
Set the specified arguments of the instruction.
bool isSimple() const
Definition: Instructions.h:263
void setAlignment(Constant *A)
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:191
op_iterator op_begin()
Definition: User.h:205
bool operator==(const partition_iterator &RHS) const
Definition: SROA.cpp:542
static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, const DataLayout &DL)
Compute the adjusted alignment for a load or store from an offset.
Definition: SROA.cpp:1585
Builder for the alloca slices.
Definition: SROA.cpp:607
Type * getPointerElementType() const
Definition: Type.h:358
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:239
static Value * getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, Value *BasePtr, Type *Ty, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Get a natural GEP off of the BasePtr walking through Ty toward TargetTy without changing the offset o...
Definition: SROA.cpp:1332
element_iterator element_end() const
Definition: DerivedTypes.h:280
static Value * buildGEP(IRBuilderTy &IRB, Value *BasePtr, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Build a GEP out of a base pointer and indices.
Definition: SROA.cpp:1309
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL)
Test whether the given slice use can be promoted to a vector.
Definition: SROA.cpp:1705
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:496
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
bool isPacked() const
Definition: DerivedTypes.h:245
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
This class represents the LLVM 'select' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:278
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:566
RetTy visitIntrinsicInst(IntrinsicInst &I)
Definition: InstVisitor.h:222
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:143
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
Class to represent struct types.
Definition: DerivedTypes.h:199
Type * getArrayElementType() const
Definition: Type.h:347
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool empty() const
Test whether this partition contains no slices, and merely spans a region occupied by split slices...
Definition: SROA.cpp:359
bool isEscaped() const
Test whether a pointer to the allocation escapes our analysis.
Definition: SROA.cpp:205
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition: SROA.cpp:3213
static void advance(T &it, size_t Val)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition: SROA.cpp:1653
const_iterator begin() const
Definition: SROA.cpp:216
element_iterator element_begin() const
Definition: DerivedTypes.h:279
A partition of the slices.
Definition: SROA.cpp:317
ArrayRef< Slice * > splitSliceTails() const
Get the sequence of split slice tails.
Definition: SROA.cpp:379
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: SROA.cpp:4270
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
Definition: SROA.cpp:4261
This file provides a collection of visitors which walk the (instruction) uses of a pointer...
SynchronizationScope getSynchScope() const
Definition: Instructions.h:245
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:399
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:90
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AliasSetTracker *AST=nullptr, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1587
void setAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope=CrossThread)
Definition: Instructions.h:257
bool visit(AllocaSlices::const_iterator I)
Definition: SROA.cpp:2226
A base class for visitors over the uses of a pointer value.
#define F(x, y, z)
Definition: MD5.cpp:51
ArrayRef< Use * > getDeadOperands() const
Access the dead operands referring to this alloca.
Definition: SROA.cpp:250
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
A legacy pass for the legacy pass manager that wraps the SROA pass.
Definition: SROA.cpp:4253
Class to represent array types.
Definition: DerivedTypes.h:345
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:256
This class represents a no-op cast from one type to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
ConstantFolder - Create constants with minimum, target independent, folding.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
Definition: Instructions.h:300
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(std::begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:791
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:210
static Constant * getUDiv(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2165
RetTy visitMemTransferInst(MemTransferInst &I)
Definition: InstVisitor.h:217
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:65
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.cpp:44
Type * getElementType() const
Definition: DerivedTypes.h:336
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy)
Test whether we can convert a value from the old to the new type.
Definition: SROA.cpp:1611
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Class to represent pointers.
Definition: DerivedTypes.h:443
uint64_t endOffset() const
The end offset of this partition.
Definition: SROA.cpp:347
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void printSlice(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:517
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI, uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, SmallPtrSetImpl< PHINode * > &PHIUsers, SmallPtrSetImpl< SelectInst * > &SelectUsers)
Definition: SROA.cpp:2196
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:254
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
Definition: InstrTypes.h:74
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
Scalar Replacement Of false
Definition: SROA.cpp:4289
iterator_range< partition_iterator > partitions()
SmallVectorImpl< Slice >::const_iterator const_iterator
Definition: SROA.cpp:214
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:348
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:52
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
uint64_t beginOffset() const
The start offset of this partition.
Definition: SROA.cpp:342
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:295
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:653
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1222
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static unsigned getPointerOperandIndex()
Definition: Instructions.h:396
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:399
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:290
This is an important base class in LLVM.
Definition: Constant.h:42
const Value * getCondition() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSafeSelectToSpeculate(SelectInst &SI)
Select instructions that use an alloca and are subsequently loaded can be rewritten to load both inpu...
Definition: SROA.cpp:1244
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:109
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:587
Value * getRawDest() const
Value * stripInBoundsOffsets()
Strip off pointer casts and inbounds GEPs.
Definition: Value.cpp:544
static sys::TimePoint< std::chrono::seconds > now(bool Deterministic)
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:207
static cl::opt< bool > SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden)
Hidden option to experiment with completely strict handling of inbounds GEPs.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:743
uint32_t Offset
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:150
Analysis pass providing a never-invalidated alias analysis result.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
iterator begin() const
Definition: ArrayRef.h:129
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1119
void initializeSROALegacyPassPass(PassRegistry &)
uint64_t getNumElements() const
Definition: DerivedTypes.h:335
void print(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
sroa
Definition: SROA.cpp:4289
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1854
Value * getOperand(unsigned i) const
Definition: User.h:145
op_range operands()
Definition: User.h:213
Value * getPointerOperand()
Definition: Instructions.h:270
iterator begin() const
Definition: SmallPtrSet.h:398
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
Class to represent integer types.
Definition: DerivedTypes.h:39
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:178
void setAlignment(unsigned Align)
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
Definition: Constants.cpp:249
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:98
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
iterator erase(const_iterator CI)
Definition: SmallVector.h:431
LLVM_NODISCARD size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:264
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:213
const Value * getTrueValue() const
iterator end() const
Definition: SROA.cpp:371
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_NODISCARD size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition: StringRef.h:357
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition: SROA.cpp:3175
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1083
SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
Definition: SmallPtrSet.h:275
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition: IRBuilder.h:62
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:709
This is the superclass of the array and vector type classes.
Definition: DerivedTypes.h:319
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:689
isPodLike - This is a type trait that is used to determine whether a given type can be copied around ...
Definition: ArrayRef.h:507
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
AtomicOrdering getOrdering() const
Returns the ordering effect of this store.
Definition: Instructions.h:355
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
iterator end()
Definition: BasicBlock.h:230
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
AllocaSlices(const DataLayout &DL, AllocaInst &AI)
Construct the slices of a particular alloca.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:59
partition_iterator & operator++()
Definition: SROA.cpp:562
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition: SROA.cpp:2089
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:218
uint64_t size() const
The size of the partition.
Definition: SROA.cpp:352
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:625
Value * getLength() const
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
uint64_t getSizeInBytes() const
Definition: DataLayout.h:503
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:173
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition: SROA.cpp:622
unsigned getElementContainingOffset(uint64_t Offset) const
Given a valid byte offset into the structure, returns the structure index that contains it...
Definition: DataLayout.cpp:79
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
AtomicOrdering getOrdering() const
Returns the ordering effect of this fence.
Definition: Instructions.h:234
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
const BasicBlock & getEntryBlock() const
Definition: Function.h:519
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:275
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void setOperand(unsigned i, Value *Val)
Definition: User.h:150
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:122
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
A range adaptor for a pair of iterators.
Class to represent vector types.
Definition: DerivedTypes.h:369
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition: SROA.cpp:1887
iterator_range< user_iterator > users()
Definition: Value.h:370
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:179
#define NDEBUG
Definition: regutils.h:48
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2032
void setLength(Value *L)
static Type * findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices...
Definition: SROA.cpp:1057
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition: SROA.cpp:595
Virtual Register Rewriter
Definition: VirtRegMap.cpp:194
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1724
iterator end() const
Definition: SmallPtrSet.h:405
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
static const size_t npos
Definition: StringRef.h:51
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: SROA.cpp:4277
Visitor to rewrite instructions using p particular slice of an alloca to use a new alloca...
Definition: SROA.cpp:2142
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:227
void * PointerTy
Definition: GenericValue.h:24
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
static void speculateSelectInstLoads(SelectInst &SI)
Definition: SROA.cpp:1266
static bool runImpl(CallGraphSCC &SCC, CallGraph &CG, function_ref< AAResults &(Function &F)> AARGetter, unsigned MaxElements)
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:383
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:349
FunctionPass * createSROAPass()
Definition: SROA.cpp:4283
void setSource(Value *Ptr)
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:120
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition: SROA.cpp:2063
const_iterator end() const
Definition: SROA.cpp:217
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:391
static Value * getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Recursively compute indices for a natural GEP.
Definition: SROA.cpp:1376
iterator_range< iterator > range
Definition: SROA.cpp:210
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const
Definition: IRBuilder.h:64
Value * getRawSource() const
Return the arguments to the instruction.
static unsigned getPointerOperandIndex()
Definition: Instructions.h:272
bool use_empty() const
Definition: Value.h:299
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:346
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:288
LLVM Value Representation.
Definition: Value.h:71
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1007
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition: SROA.cpp:1124
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:631
APInt shl(const APInt &LHS, unsigned shiftAmt)
Left-shift function.
Definition: APInt.h:1899
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
iterator_range< const_iterator > const_range
Definition: SROA.cpp:215
bool isTriviallyEmpty() const
Check if this twine is trivially empty; a false return value does not necessarily mean the twine is e...
Definition: Twine.h:408
static void speculatePHINodeLoads(PHINode &PN)
Definition: SROA.cpp:1188
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:533
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:100
An optimization pass providing Scalar Replacement of Aggregates.
Definition: SROA.h:54
static cl::opt< bool > SROARandomShuffleSlices("sroa-random-shuffle-slices", cl::init(false), cl::Hidden)
Hidden option to enable randomly shuffling the slices to help uncover instability in their order...
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:678
IRTranslator LLVM IR MI
const Value * getFalseValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones...
Definition: SROA.cpp:1967
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1722
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:242
int * Ptr
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:262
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
const Use & getRawDestUse() const
bool isBigEndian() const
Definition: DataLayout.h:221
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
This represents the llvm.dbg.declare instruction.
Definition: IntrinsicInst.h:89
Value * getPointerOperand()
Definition: Instructions.h:394
const BasicBlock * getParent() const
Definition: Instruction.h:62
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
INITIALIZE_PASS_BEGIN(SROALegacyPass,"sroa","Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_END(SROALegacyPass
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)
ArrayRef< Instruction * > getDeadUsers() const
Access the dead users for this alloca.
Definition: SROA.cpp:242
static Value * foldSelectInst(SelectInst &SI)
Definition: SROA.cpp:582