LLVM  10.0.0svn
SROA.cpp
Go to the documentation of this file.
1 //===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This transformation implements the well known scalar replacement of
10 /// aggregates transformation. It tries to identify promotable elements of an
11 /// aggregate alloca, and promote them to registers. It will also try to
12 /// convert uses of an element (or set of elements) of an alloca into a vector
13 /// or bitfield-style integer scalar if appropriate.
14 ///
15 /// It works to do this with minimal slicing of the alloca so that regions
16 /// which are merely transferred in and out of external memory remain unchanged
17 /// and are not decomposed to scalar code.
18 ///
19 /// Because this also performs alloca promotion, it can be thought of as also
20 /// serving the purpose of SSA formation. The algorithm iterates on the
21 /// function until all opportunities for promotion have been realized.
22 ///
23 //===----------------------------------------------------------------------===//
24 
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SetVector.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/Twine.h"
38 #include "llvm/ADT/iterator.h"
42 #include "llvm/Analysis/Loads.h"
45 #include "llvm/Config/llvm-config.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/ConstantFolder.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/DIBuilder.h"
51 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
57 #include "llvm/IR/GlobalAlias.h"
58 #include "llvm/IR/IRBuilder.h"
59 #include "llvm/IR/InstVisitor.h"
60 #include "llvm/IR/InstrTypes.h"
61 #include "llvm/IR/Instruction.h"
62 #include "llvm/IR/Instructions.h"
63 #include "llvm/IR/IntrinsicInst.h"
64 #include "llvm/IR/Intrinsics.h"
65 #include "llvm/IR/LLVMContext.h"
66 #include "llvm/IR/Metadata.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/Operator.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
82 #include "llvm/Transforms/Scalar.h"
84 #include <algorithm>
85 #include <cassert>
86 #include <chrono>
87 #include <cstddef>
88 #include <cstdint>
89 #include <cstring>
90 #include <iterator>
91 #include <string>
92 #include <tuple>
93 #include <utility>
94 #include <vector>
95 
96 #ifndef NDEBUG
97 // We only use this for a debug check.
98 #include <random>
99 #endif
100 
101 using namespace llvm;
102 using namespace llvm::sroa;
103 
104 #define DEBUG_TYPE "sroa"
105 
106 STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
107 STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
108 STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
109 STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
110 STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
111 STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
112 STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
113 STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
114 STATISTIC(NumDeleted, "Number of instructions deleted");
115 STATISTIC(NumVectorized, "Number of vectorized aggregates");
116 
117 /// Hidden option to enable randomly shuffling the slices to help uncover
118 /// instability in their order.
119 static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
120  cl::init(false), cl::Hidden);
121 
122 /// Hidden option to experiment with completely strict handling of inbounds
123 /// GEPs.
124 static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
125  cl::Hidden);
126 
127 namespace {
128 
129 /// A custom IRBuilder inserter which prefixes all names, but only in
130 /// Assert builds.
131 class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter {
132  std::string Prefix;
133 
134  const Twine getNameWithPrefix(const Twine &Name) const {
135  return Name.isTriviallyEmpty() ? Name : Prefix + Name;
136  }
137 
138 public:
139  void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
140 
141 protected:
142  void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
143  BasicBlock::iterator InsertPt) const {
144  IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB,
145  InsertPt);
146  }
147 };
148 
149 /// Provide a type for IRBuilder that drops names in release builds.
151 
152 /// A used slice of an alloca.
153 ///
154 /// This structure represents a slice of an alloca used by some instruction. It
155 /// stores both the begin and end offsets of this use, a pointer to the use
156 /// itself, and a flag indicating whether we can classify the use as splittable
157 /// or not when forming partitions of the alloca.
158 class Slice {
159  /// The beginning offset of the range.
160  uint64_t BeginOffset = 0;
161 
162  /// The ending offset, not included in the range.
163  uint64_t EndOffset = 0;
164 
165  /// Storage for both the use of this slice and whether it can be
166  /// split.
167  PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
168 
169 public:
170  Slice() = default;
171 
172  Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
173  : BeginOffset(BeginOffset), EndOffset(EndOffset),
174  UseAndIsSplittable(U, IsSplittable) {}
175 
176  uint64_t beginOffset() const { return BeginOffset; }
177  uint64_t endOffset() const { return EndOffset; }
178 
179  bool isSplittable() const { return UseAndIsSplittable.getInt(); }
180  void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
181 
182  Use *getUse() const { return UseAndIsSplittable.getPointer(); }
183 
184  bool isDead() const { return getUse() == nullptr; }
185  void kill() { UseAndIsSplittable.setPointer(nullptr); }
186 
187  /// Support for ordering ranges.
188  ///
189  /// This provides an ordering over ranges such that start offsets are
190  /// always increasing, and within equal start offsets, the end offsets are
191  /// decreasing. Thus the spanning range comes first in a cluster with the
192  /// same start position.
193  bool operator<(const Slice &RHS) const {
194  if (beginOffset() < RHS.beginOffset())
195  return true;
196  if (beginOffset() > RHS.beginOffset())
197  return false;
198  if (isSplittable() != RHS.isSplittable())
199  return !isSplittable();
200  if (endOffset() > RHS.endOffset())
201  return true;
202  return false;
203  }
204 
205  /// Support comparison with a single offset to allow binary searches.
206  friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
207  uint64_t RHSOffset) {
208  return LHS.beginOffset() < RHSOffset;
209  }
210  friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
211  const Slice &RHS) {
212  return LHSOffset < RHS.beginOffset();
213  }
214 
215  bool operator==(const Slice &RHS) const {
216  return isSplittable() == RHS.isSplittable() &&
217  beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
218  }
219  bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
220 };
221 
222 } // end anonymous namespace
223 
224 /// Representation of the alloca slices.
225 ///
226 /// This class represents the slices of an alloca which are formed by its
227 /// various uses. If a pointer escapes, we can't fully build a representation
228 /// for the slices used and we reflect that in this structure. The uses are
229 /// stored, sorted by increasing beginning offset and with unsplittable slices
230 /// starting at a particular offset before splittable slices.
232 public:
233  /// Construct the slices of a particular alloca.
234  AllocaSlices(const DataLayout &DL, AllocaInst &AI);
235 
236  /// Test whether a pointer to the allocation escapes our analysis.
237  ///
238  /// If this is true, the slices are never fully built and should be
239  /// ignored.
240  bool isEscaped() const { return PointerEscapingInstr; }
241 
242  /// Support for iterating over the slices.
243  /// @{
246 
247  iterator begin() { return Slices.begin(); }
248  iterator end() { return Slices.end(); }
249 
252 
253  const_iterator begin() const { return Slices.begin(); }
254  const_iterator end() const { return Slices.end(); }
255  /// @}
256 
257  /// Erase a range of slices.
258  void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
259 
260  /// Insert new slices for this alloca.
261  ///
262  /// This moves the slices into the alloca's slices collection, and re-sorts
263  /// everything so that the usual ordering properties of the alloca's slices
264  /// hold.
265  void insert(ArrayRef<Slice> NewSlices) {
266  int OldSize = Slices.size();
267  Slices.append(NewSlices.begin(), NewSlices.end());
268  auto SliceI = Slices.begin() + OldSize;
269  llvm::sort(SliceI, Slices.end());
270  std::inplace_merge(Slices.begin(), SliceI, Slices.end());
271  }
272 
273  // Forward declare the iterator and range accessor for walking the
274  // partitions.
275  class partition_iterator;
277 
278  /// Access the dead users for this alloca.
279  ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
280 
281  /// Access the dead operands referring to this alloca.
282  ///
283  /// These are operands which have cannot actually be used to refer to the
284  /// alloca as they are outside its range and the user doesn't correct for
285  /// that. These mostly consist of PHI node inputs and the like which we just
286  /// need to replace with undef.
287  ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
288 
289 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
290  void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
291  void printSlice(raw_ostream &OS, const_iterator I,
292  StringRef Indent = " ") const;
293  void printUse(raw_ostream &OS, const_iterator I,
294  StringRef Indent = " ") const;
295  void print(raw_ostream &OS) const;
296  void dump(const_iterator I) const;
297  void dump() const;
298 #endif
299 
300 private:
301  template <typename DerivedT, typename RetT = void> class BuilderBase;
302  class SliceBuilder;
303 
304  friend class AllocaSlices::SliceBuilder;
305 
306 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
307  /// Handle to alloca instruction to simplify method interfaces.
308  AllocaInst &AI;
309 #endif
310 
311  /// The instruction responsible for this alloca not having a known set
312  /// of slices.
313  ///
314  /// When an instruction (potentially) escapes the pointer to the alloca, we
315  /// store a pointer to that here and abort trying to form slices of the
316  /// alloca. This will be null if the alloca slices are analyzed successfully.
317  Instruction *PointerEscapingInstr;
318 
319  /// The slices of the alloca.
320  ///
321  /// We store a vector of the slices formed by uses of the alloca here. This
322  /// vector is sorted by increasing begin offset, and then the unsplittable
323  /// slices before the splittable ones. See the Slice inner class for more
324  /// details.
325  SmallVector<Slice, 8> Slices;
326 
327  /// Instructions which will become dead if we rewrite the alloca.
328  ///
329  /// Note that these are not separated by slice. This is because we expect an
330  /// alloca to be completely rewritten or not rewritten at all. If rewritten,
331  /// all these instructions can simply be removed and replaced with undef as
332  /// they come from outside of the allocated space.
334 
335  /// Operands which will become dead if we rewrite the alloca.
336  ///
337  /// These are operands that in their particular use can be replaced with
338  /// undef when we rewrite the alloca. These show up in out-of-bounds inputs
339  /// to PHI nodes and the like. They aren't entirely dead (there might be
340  /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
341  /// want to swap this particular input for undef to simplify the use lists of
342  /// the alloca.
343  SmallVector<Use *, 8> DeadOperands;
344 };
345 
346 /// A partition of the slices.
347 ///
348 /// An ephemeral representation for a range of slices which can be viewed as
349 /// a partition of the alloca. This range represents a span of the alloca's
350 /// memory which cannot be split, and provides access to all of the slices
351 /// overlapping some part of the partition.
352 ///
353 /// Objects of this type are produced by traversing the alloca's slices, but
354 /// are only ephemeral and not persistent.
356 private:
357  friend class AllocaSlices;
359 
360  using iterator = AllocaSlices::iterator;
361 
362  /// The beginning and ending offsets of the alloca for this
363  /// partition.
364  uint64_t BeginOffset, EndOffset;
365 
366  /// The start and end iterators of this partition.
367  iterator SI, SJ;
368 
369  /// A collection of split slice tails overlapping the partition.
370  SmallVector<Slice *, 4> SplitTails;
371 
372  /// Raw constructor builds an empty partition starting and ending at
373  /// the given iterator.
374  Partition(iterator SI) : SI(SI), SJ(SI) {}
375 
376 public:
377  /// The start offset of this partition.
378  ///
379  /// All of the contained slices start at or after this offset.
380  uint64_t beginOffset() const { return BeginOffset; }
381 
382  /// The end offset of this partition.
383  ///
384  /// All of the contained slices end at or before this offset.
385  uint64_t endOffset() const { return EndOffset; }
386 
387  /// The size of the partition.
388  ///
389  /// Note that this can never be zero.
390  uint64_t size() const {
391  assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
392  return EndOffset - BeginOffset;
393  }
394 
395  /// Test whether this partition contains no slices, and merely spans
396  /// a region occupied by split slices.
397  bool empty() const { return SI == SJ; }
398 
399  /// \name Iterate slices that start within the partition.
400  /// These may be splittable or unsplittable. They have a begin offset >= the
401  /// partition begin offset.
402  /// @{
403  // FIXME: We should probably define a "concat_iterator" helper and use that
404  // to stitch together pointee_iterators over the split tails and the
405  // contiguous iterators of the partition. That would give a much nicer
406  // interface here. We could then additionally expose filtered iterators for
407  // split, unsplit, and unsplittable splices based on the usage patterns.
408  iterator begin() const { return SI; }
409  iterator end() const { return SJ; }
410  /// @}
411 
412  /// Get the sequence of split slice tails.
413  ///
414  /// These tails are of slices which start before this partition but are
415  /// split and overlap into the partition. We accumulate these while forming
416  /// partitions.
417  ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
418 };
419 
420 /// An iterator over partitions of the alloca's slices.
421 ///
422 /// This iterator implements the core algorithm for partitioning the alloca's
423 /// slices. It is a forward iterator as we don't support backtracking for
424 /// efficiency reasons, and re-use a single storage area to maintain the
425 /// current set of split slices.
426 ///
427 /// It is templated on the slice iterator type to use so that it can operate
428 /// with either const or non-const slice iterators.
430  : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
431  Partition> {
432  friend class AllocaSlices;
433 
434  /// Most of the state for walking the partitions is held in a class
435  /// with a nice interface for examining them.
436  Partition P;
437 
438  /// We need to keep the end of the slices to know when to stop.
440 
441  /// We also need to keep track of the maximum split end offset seen.
442  /// FIXME: Do we really?
443  uint64_t MaxSplitSliceEndOffset = 0;
444 
445  /// Sets the partition to be empty at given iterator, and sets the
446  /// end iterator.
448  : P(SI), SE(SE) {
449  // If not already at the end, advance our state to form the initial
450  // partition.
451  if (SI != SE)
452  advance();
453  }
454 
455  /// Advance the iterator to the next partition.
456  ///
457  /// Requires that the iterator not be at the end of the slices.
458  void advance() {
459  assert((P.SI != SE || !P.SplitTails.empty()) &&
460  "Cannot advance past the end of the slices!");
461 
462  // Clear out any split uses which have ended.
463  if (!P.SplitTails.empty()) {
464  if (P.EndOffset >= MaxSplitSliceEndOffset) {
465  // If we've finished all splits, this is easy.
466  P.SplitTails.clear();
467  MaxSplitSliceEndOffset = 0;
468  } else {
469  // Remove the uses which have ended in the prior partition. This
470  // cannot change the max split slice end because we just checked that
471  // the prior partition ended prior to that max.
472  P.SplitTails.erase(llvm::remove_if(P.SplitTails,
473  [&](Slice *S) {
474  return S->endOffset() <=
475  P.EndOffset;
476  }),
477  P.SplitTails.end());
478  assert(llvm::any_of(P.SplitTails,
479  [&](Slice *S) {
480  return S->endOffset() == MaxSplitSliceEndOffset;
481  }) &&
482  "Could not find the current max split slice offset!");
483  assert(llvm::all_of(P.SplitTails,
484  [&](Slice *S) {
485  return S->endOffset() <= MaxSplitSliceEndOffset;
486  }) &&
487  "Max split slice end offset is not actually the max!");
488  }
489  }
490 
491  // If P.SI is already at the end, then we've cleared the split tail and
492  // now have an end iterator.
493  if (P.SI == SE) {
494  assert(P.SplitTails.empty() && "Failed to clear the split slices!");
495  return;
496  }
497 
498  // If we had a non-empty partition previously, set up the state for
499  // subsequent partitions.
500  if (P.SI != P.SJ) {
501  // Accumulate all the splittable slices which started in the old
502  // partition into the split list.
503  for (Slice &S : P)
504  if (S.isSplittable() && S.endOffset() > P.EndOffset) {
505  P.SplitTails.push_back(&S);
506  MaxSplitSliceEndOffset =
507  std::max(S.endOffset(), MaxSplitSliceEndOffset);
508  }
509 
510  // Start from the end of the previous partition.
511  P.SI = P.SJ;
512 
513  // If P.SI is now at the end, we at most have a tail of split slices.
514  if (P.SI == SE) {
515  P.BeginOffset = P.EndOffset;
516  P.EndOffset = MaxSplitSliceEndOffset;
517  return;
518  }
519 
520  // If the we have split slices and the next slice is after a gap and is
521  // not splittable immediately form an empty partition for the split
522  // slices up until the next slice begins.
523  if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
524  !P.SI->isSplittable()) {
525  P.BeginOffset = P.EndOffset;
526  P.EndOffset = P.SI->beginOffset();
527  return;
528  }
529  }
530 
531  // OK, we need to consume new slices. Set the end offset based on the
532  // current slice, and step SJ past it. The beginning offset of the
533  // partition is the beginning offset of the next slice unless we have
534  // pre-existing split slices that are continuing, in which case we begin
535  // at the prior end offset.
536  P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
537  P.EndOffset = P.SI->endOffset();
538  ++P.SJ;
539 
540  // There are two strategies to form a partition based on whether the
541  // partition starts with an unsplittable slice or a splittable slice.
542  if (!P.SI->isSplittable()) {
543  // When we're forming an unsplittable region, it must always start at
544  // the first slice and will extend through its end.
545  assert(P.BeginOffset == P.SI->beginOffset());
546 
547  // Form a partition including all of the overlapping slices with this
548  // unsplittable slice.
549  while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
550  if (!P.SJ->isSplittable())
551  P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
552  ++P.SJ;
553  }
554 
555  // We have a partition across a set of overlapping unsplittable
556  // partitions.
557  return;
558  }
559 
560  // If we're starting with a splittable slice, then we need to form
561  // a synthetic partition spanning it and any other overlapping splittable
562  // splices.
563  assert(P.SI->isSplittable() && "Forming a splittable partition!");
564 
565  // Collect all of the overlapping splittable slices.
566  while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
567  P.SJ->isSplittable()) {
568  P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
569  ++P.SJ;
570  }
571 
572  // Back upiP.EndOffset if we ended the span early when encountering an
573  // unsplittable slice. This synthesizes the early end offset of
574  // a partition spanning only splittable slices.
575  if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
576  assert(!P.SJ->isSplittable());
577  P.EndOffset = P.SJ->beginOffset();
578  }
579  }
580 
581 public:
582  bool operator==(const partition_iterator &RHS) const {
583  assert(SE == RHS.SE &&
584  "End iterators don't match between compared partition iterators!");
585 
586  // The observed positions of partitions is marked by the P.SI iterator and
587  // the emptiness of the split slices. The latter is only relevant when
588  // P.SI == SE, as the end iterator will additionally have an empty split
589  // slices list, but the prior may have the same P.SI and a tail of split
590  // slices.
591  if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
592  assert(P.SJ == RHS.P.SJ &&
593  "Same set of slices formed two different sized partitions!");
594  assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
595  "Same slice position with differently sized non-empty split "
596  "slice tails!");
597  return true;
598  }
599  return false;
600  }
601 
603  advance();
604  return *this;
605  }
606 
607  Partition &operator*() { return P; }
608 };
609 
610 /// A forward range over the partitions of the alloca's slices.
611 ///
612 /// This accesses an iterator range over the partitions of the alloca's
613 /// slices. It computes these partitions on the fly based on the overlapping
614 /// offsets of the slices and the ability to split them. It will visit "empty"
615 /// partitions to cover regions of the alloca only accessed via split
616 /// slices.
618  return make_range(partition_iterator(begin(), end()),
619  partition_iterator(end(), end()));
620 }
621 
623  // If the condition being selected on is a constant or the same value is
624  // being selected between, fold the select. Yes this does (rarely) happen
625  // early on.
626  if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
627  return SI.getOperand(1 + CI->isZero());
628  if (SI.getOperand(1) == SI.getOperand(2))
629  return SI.getOperand(1);
630 
631  return nullptr;
632 }
633 
634 /// A helper that folds a PHI node or a select.
636  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
637  // If PN merges together the same value, return that value.
638  return PN->hasConstantValue();
639  }
640  return foldSelectInst(cast<SelectInst>(I));
641 }
642 
643 /// Builder for the alloca slices.
644 ///
645 /// This class builds a set of alloca slices by recursively visiting the uses
646 /// of an alloca and making a slice for each load and store at each offset.
647 class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
649  friend class InstVisitor<SliceBuilder>;
650 
652 
653  const uint64_t AllocSize;
654  AllocaSlices &AS;
655 
656  SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
658 
659  /// Set to de-duplicate dead instructions found in the use walk.
660  SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
661 
662 public:
665  AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), AS(AS) {}
666 
667 private:
668  void markAsDead(Instruction &I) {
669  if (VisitedDeadInsts.insert(&I).second)
670  AS.DeadUsers.push_back(&I);
671  }
672 
673  void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
674  bool IsSplittable = false) {
675  // Completely skip uses which have a zero size or start either before or
676  // past the end of the allocation.
677  if (Size == 0 || Offset.uge(AllocSize)) {
678  LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
679  << Offset
680  << " which has zero size or starts outside of the "
681  << AllocSize << " byte alloca:\n"
682  << " alloca: " << AS.AI << "\n"
683  << " use: " << I << "\n");
684  return markAsDead(I);
685  }
686 
687  uint64_t BeginOffset = Offset.getZExtValue();
688  uint64_t EndOffset = BeginOffset + Size;
689 
690  // Clamp the end offset to the end of the allocation. Note that this is
691  // formulated to handle even the case where "BeginOffset + Size" overflows.
692  // This may appear superficially to be something we could ignore entirely,
693  // but that is not so! There may be widened loads or PHI-node uses where
694  // some instructions are dead but not others. We can't completely ignore
695  // them, and so have to record at least the information here.
696  assert(AllocSize >= BeginOffset); // Established above.
697  if (Size > AllocSize - BeginOffset) {
698  LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
699  << Offset << " to remain within the " << AllocSize
700  << " byte alloca:\n"
701  << " alloca: " << AS.AI << "\n"
702  << " use: " << I << "\n");
703  EndOffset = AllocSize;
704  }
705 
706  AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
707  }
708 
709  void visitBitCastInst(BitCastInst &BC) {
710  if (BC.use_empty())
711  return markAsDead(BC);
712 
713  return Base::visitBitCastInst(BC);
714  }
715 
716  void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
717  if (ASC.use_empty())
718  return markAsDead(ASC);
719 
720  return Base::visitAddrSpaceCastInst(ASC);
721  }
722 
723  void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
724  if (GEPI.use_empty())
725  return markAsDead(GEPI);
726 
727  if (SROAStrictInbounds && GEPI.isInBounds()) {
728  // FIXME: This is a manually un-factored variant of the basic code inside
729  // of GEPs with checking of the inbounds invariant specified in the
730  // langref in a very strict sense. If we ever want to enable
731  // SROAStrictInbounds, this code should be factored cleanly into
732  // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
733  // by writing out the code here where we have the underlying allocation
734  // size readily available.
735  APInt GEPOffset = Offset;
736  const DataLayout &DL = GEPI.getModule()->getDataLayout();
737  for (gep_type_iterator GTI = gep_type_begin(GEPI),
738  GTE = gep_type_end(GEPI);
739  GTI != GTE; ++GTI) {
740  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
741  if (!OpC)
742  break;
743 
744  // Handle a struct index, which adds its field offset to the pointer.
745  if (StructType *STy = GTI.getStructTypeOrNull()) {
746  unsigned ElementIdx = OpC->getZExtValue();
747  const StructLayout *SL = DL.getStructLayout(STy);
748  GEPOffset +=
749  APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
750  } else {
751  // For array or vector indices, scale the index by the size of the
752  // type.
753  APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
754  GEPOffset += Index * APInt(Offset.getBitWidth(),
755  DL.getTypeAllocSize(GTI.getIndexedType()));
756  }
757 
758  // If this index has computed an intermediate pointer which is not
759  // inbounds, then the result of the GEP is a poison value and we can
760  // delete it and all uses.
761  if (GEPOffset.ugt(AllocSize))
762  return markAsDead(GEPI);
763  }
764  }
765 
766  return Base::visitGetElementPtrInst(GEPI);
767  }
768 
769  void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
770  uint64_t Size, bool IsVolatile) {
771  // We allow splitting of non-volatile loads and stores where the type is an
772  // integer type. These may be used to implement 'memcpy' or other "transfer
773  // of bits" patterns.
774  bool IsSplittable = Ty->isIntegerTy() && !IsVolatile;
775 
776  insertUse(I, Offset, Size, IsSplittable);
777  }
778 
779  void visitLoadInst(LoadInst &LI) {
780  assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
781  "All simple FCA loads should have been pre-split");
782 
783  if (!IsOffsetKnown)
784  return PI.setAborted(&LI);
785 
786  if (LI.isVolatile() &&
787  LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
788  return PI.setAborted(&LI);
789 
790  uint64_t Size = DL.getTypeStoreSize(LI.getType());
791  return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
792  }
793 
794  void visitStoreInst(StoreInst &SI) {
795  Value *ValOp = SI.getValueOperand();
796  if (ValOp == *U)
797  return PI.setEscapedAndAborted(&SI);
798  if (!IsOffsetKnown)
799  return PI.setAborted(&SI);
800 
801  if (SI.isVolatile() &&
802  SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
803  return PI.setAborted(&SI);
804 
805  uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
806 
807  // If this memory access can be shown to *statically* extend outside the
808  // bounds of the allocation, it's behavior is undefined, so simply
809  // ignore it. Note that this is more strict than the generic clamping
810  // behavior of insertUse. We also try to handle cases which might run the
811  // risk of overflow.
812  // FIXME: We should instead consider the pointer to have escaped if this
813  // function is being instrumented for addressing bugs or race conditions.
814  if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
815  LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
816  << Offset << " which extends past the end of the "
817  << AllocSize << " byte alloca:\n"
818  << " alloca: " << AS.AI << "\n"
819  << " use: " << SI << "\n");
820  return markAsDead(SI);
821  }
822 
823  assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
824  "All simple FCA stores should have been pre-split");
825  handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
826  }
827 
828  void visitMemSetInst(MemSetInst &II) {
829  assert(II.getRawDest() == *U && "Pointer use is not the destination?");
830  ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
831  if ((Length && Length->getValue() == 0) ||
832  (IsOffsetKnown && Offset.uge(AllocSize)))
833  // Zero-length mem transfer intrinsics can be ignored entirely.
834  return markAsDead(II);
835 
836  if (!IsOffsetKnown)
837  return PI.setAborted(&II);
838 
839  // Don't replace this with a store with a different address space. TODO:
840  // Use a store with the casted new alloca?
841  if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
842  return PI.setAborted(&II);
843 
844  insertUse(II, Offset, Length ? Length->getLimitedValue()
845  : AllocSize - Offset.getLimitedValue(),
846  (bool)Length);
847  }
848 
849  void visitMemTransferInst(MemTransferInst &II) {
850  ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
851  if (Length && Length->getValue() == 0)
852  // Zero-length mem transfer intrinsics can be ignored entirely.
853  return markAsDead(II);
854 
855  // Because we can visit these intrinsics twice, also check to see if the
856  // first time marked this instruction as dead. If so, skip it.
857  if (VisitedDeadInsts.count(&II))
858  return;
859 
860  if (!IsOffsetKnown)
861  return PI.setAborted(&II);
862 
863  // Don't replace this with a load/store with a different address space.
864  // TODO: Use a store with the casted new alloca?
865  if (II.isVolatile() &&
866  (II.getDestAddressSpace() != DL.getAllocaAddrSpace() ||
867  II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
868  return PI.setAborted(&II);
869 
870  // This side of the transfer is completely out-of-bounds, and so we can
871  // nuke the entire transfer. However, we also need to nuke the other side
872  // if already added to our partitions.
873  // FIXME: Yet another place we really should bypass this when
874  // instrumenting for ASan.
875  if (Offset.uge(AllocSize)) {
877  MemTransferSliceMap.find(&II);
878  if (MTPI != MemTransferSliceMap.end())
879  AS.Slices[MTPI->second].kill();
880  return markAsDead(II);
881  }
882 
883  uint64_t RawOffset = Offset.getLimitedValue();
884  uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
885 
886  // Check for the special case where the same exact value is used for both
887  // source and dest.
888  if (*U == II.getRawDest() && *U == II.getRawSource()) {
889  // For non-volatile transfers this is a no-op.
890  if (!II.isVolatile())
891  return markAsDead(II);
892 
893  return insertUse(II, Offset, Size, /*IsSplittable=*/false);
894  }
895 
896  // If we have seen both source and destination for a mem transfer, then
897  // they both point to the same alloca.
898  bool Inserted;
900  std::tie(MTPI, Inserted) =
901  MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
902  unsigned PrevIdx = MTPI->second;
903  if (!Inserted) {
904  Slice &PrevP = AS.Slices[PrevIdx];
905 
906  // Check if the begin offsets match and this is a non-volatile transfer.
907  // In that case, we can completely elide the transfer.
908  if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
909  PrevP.kill();
910  return markAsDead(II);
911  }
912 
913  // Otherwise we have an offset transfer within the same alloca. We can't
914  // split those.
915  PrevP.makeUnsplittable();
916  }
917 
918  // Insert the use now that we've fixed up the splittable nature.
919  insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
920 
921  // Check that we ended up with a valid index in the map.
922  assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
923  "Map index doesn't point back to a slice with this user.");
924  }
925 
926  // Disable SRoA for any intrinsics except for lifetime invariants.
927  // FIXME: What about debug intrinsics? This matches old behavior, but
928  // doesn't make sense.
929  void visitIntrinsicInst(IntrinsicInst &II) {
930  if (!IsOffsetKnown)
931  return PI.setAborted(&II);
932 
933  if (II.isLifetimeStartOrEnd()) {
934  ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
935  uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
936  Length->getLimitedValue());
937  insertUse(II, Offset, Size, true);
938  return;
939  }
940 
941  Base::visitIntrinsicInst(II);
942  }
943 
944  Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
945  // We consider any PHI or select that results in a direct load or store of
946  // the same offset to be a viable use for slicing purposes. These uses
947  // are considered unsplittable and the size is the maximum loaded or stored
948  // size.
951  Visited.insert(Root);
952  Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
953  const DataLayout &DL = Root->getModule()->getDataLayout();
954  // If there are no loads or stores, the access is dead. We mark that as
955  // a size zero access.
956  Size = 0;
957  do {
958  Instruction *I, *UsedI;
959  std::tie(UsedI, I) = Uses.pop_back_val();
960 
961  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
962  Size = std::max(Size,
963  DL.getTypeStoreSize(LI->getType()).getFixedSize());
964  continue;
965  }
966  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
967  Value *Op = SI->getOperand(0);
968  if (Op == UsedI)
969  return SI;
970  Size = std::max(Size,
971  DL.getTypeStoreSize(Op->getType()).getFixedSize());
972  continue;
973  }
974 
975  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
976  if (!GEP->hasAllZeroIndices())
977  return GEP;
978  } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
979  !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
980  return I;
981  }
982 
983  for (User *U : I->users())
984  if (Visited.insert(cast<Instruction>(U)).second)
985  Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
986  } while (!Uses.empty());
987 
988  return nullptr;
989  }
990 
991  void visitPHINodeOrSelectInst(Instruction &I) {
992  assert(isa<PHINode>(I) || isa<SelectInst>(I));
993  if (I.use_empty())
994  return markAsDead(I);
995 
996  // TODO: We could use SimplifyInstruction here to fold PHINodes and
997  // SelectInsts. However, doing so requires to change the current
998  // dead-operand-tracking mechanism. For instance, suppose neither loading
999  // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1000  // trap either. However, if we simply replace %U with undef using the
1001  // current dead-operand-tracking mechanism, "load (select undef, undef,
1002  // %other)" may trap because the select may return the first operand
1003  // "undef".
1004  if (Value *Result = foldPHINodeOrSelectInst(I)) {
1005  if (Result == *U)
1006  // If the result of the constant fold will be the pointer, recurse
1007  // through the PHI/select as if we had RAUW'ed it.
1008  enqueueUsers(I);
1009  else
1010  // Otherwise the operand to the PHI/select is dead, and we can replace
1011  // it with undef.
1012  AS.DeadOperands.push_back(U);
1013 
1014  return;
1015  }
1016 
1017  if (!IsOffsetKnown)
1018  return PI.setAborted(&I);
1019 
1020  // See if we already have computed info on this node.
1021  uint64_t &Size = PHIOrSelectSizes[&I];
1022  if (!Size) {
1023  // This is a new PHI/Select, check for an unsafe use of it.
1024  if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1025  return PI.setAborted(UnsafeI);
1026  }
1027 
1028  // For PHI and select operands outside the alloca, we can't nuke the entire
1029  // phi or select -- the other side might still be relevant, so we special
1030  // case them here and use a separate structure to track the operands
1031  // themselves which should be replaced with undef.
1032  // FIXME: This should instead be escaped in the event we're instrumenting
1033  // for address sanitization.
1034  if (Offset.uge(AllocSize)) {
1035  AS.DeadOperands.push_back(U);
1036  return;
1037  }
1038 
1039  insertUse(I, Offset, Size);
1040  }
1041 
1042  void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1043 
1044  void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1045 
1046  /// Disable SROA entirely if there are unhandled users of the alloca.
1047  void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1048 };
1049 
1051  :
1052 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1053  AI(AI),
1054 #endif
1055  PointerEscapingInstr(nullptr) {
1056  SliceBuilder PB(DL, AI, *this);
1057  SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1058  if (PtrI.isEscaped() || PtrI.isAborted()) {
1059  // FIXME: We should sink the escape vs. abort info into the caller nicely,
1060  // possibly by just storing the PtrInfo in the AllocaSlices.
1061  PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1062  : PtrI.getAbortingInst();
1063  assert(PointerEscapingInstr && "Did not track a bad instruction");
1064  return;
1065  }
1066 
1067  Slices.erase(
1068  llvm::remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
1069  Slices.end());
1070 
1071 #ifndef NDEBUG
1073  std::mt19937 MT(static_cast<unsigned>(
1074  std::chrono::system_clock::now().time_since_epoch().count()));
1075  std::shuffle(Slices.begin(), Slices.end(), MT);
1076  }
1077 #endif
1078 
1079  // Sort the uses. This arranges for the offsets to be in ascending order,
1080  // and the sizes to be in descending order.
1081  llvm::sort(Slices);
1082 }
1083 
1084 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1085 
1086 void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1087  StringRef Indent) const {
1088  printSlice(OS, I, Indent);
1089  OS << "\n";
1090  printUse(OS, I, Indent);
1091 }
1092 
1093 void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1094  StringRef Indent) const {
1095  OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1096  << " slice #" << (I - begin())
1097  << (I->isSplittable() ? " (splittable)" : "");
1098 }
1099 
1100 void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1101  StringRef Indent) const {
1102  OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1103 }
1104 
1105 void AllocaSlices::print(raw_ostream &OS) const {
1106  if (PointerEscapingInstr) {
1107  OS << "Can't analyze slices for alloca: " << AI << "\n"
1108  << " A pointer to this alloca escaped by:\n"
1109  << " " << *PointerEscapingInstr << "\n";
1110  return;
1111  }
1112 
1113  OS << "Slices of alloca: " << AI << "\n";
1114  for (const_iterator I = begin(), E = end(); I != E; ++I)
1115  print(OS, I);
1116 }
1117 
1118 LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1119  print(dbgs(), I);
1120 }
1121 LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1122 
1123 #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1124 
1125 /// Walk the range of a partitioning looking for a common type to cover this
1126 /// sequence of slices.
1129  uint64_t EndOffset) {
1130  Type *Ty = nullptr;
1131  bool TyIsCommon = true;
1132  IntegerType *ITy = nullptr;
1133 
1134  // Note that we need to look at *every* alloca slice's Use to ensure we
1135  // always get consistent results regardless of the order of slices.
1136  for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1137  Use *U = I->getUse();
1138  if (isa<IntrinsicInst>(*U->getUser()))
1139  continue;
1140  if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1141  continue;
1142 
1143  Type *UserTy = nullptr;
1144  if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1145  UserTy = LI->getType();
1146  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1147  UserTy = SI->getValueOperand()->getType();
1148  }
1149 
1150  if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1151  // If the type is larger than the partition, skip it. We only encounter
1152  // this for split integer operations where we want to use the type of the
1153  // entity causing the split. Also skip if the type is not a byte width
1154  // multiple.
1155  if (UserITy->getBitWidth() % 8 != 0 ||
1156  UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1157  continue;
1158 
1159  // Track the largest bitwidth integer type used in this way in case there
1160  // is no common type.
1161  if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1162  ITy = UserITy;
1163  }
1164 
1165  // To avoid depending on the order of slices, Ty and TyIsCommon must not
1166  // depend on types skipped above.
1167  if (!UserTy || (Ty && Ty != UserTy))
1168  TyIsCommon = false; // Give up on anything but an iN type.
1169  else
1170  Ty = UserTy;
1171  }
1172 
1173  return TyIsCommon ? Ty : ITy;
1174 }
1175 
1176 /// PHI instructions that use an alloca and are subsequently loaded can be
1177 /// rewritten to load both input pointers in the pred blocks and then PHI the
1178 /// results, allowing the load of the alloca to be promoted.
1179 /// From this:
1180 /// %P2 = phi [i32* %Alloca, i32* %Other]
1181 /// %V = load i32* %P2
1182 /// to:
1183 /// %V1 = load i32* %Alloca -> will be mem2reg'd
1184 /// ...
1185 /// %V2 = load i32* %Other
1186 /// ...
1187 /// %V = phi [i32 %V1, i32 %V2]
1188 ///
1189 /// We can do this to a select if its only uses are loads and if the operands
1190 /// to the select can be loaded unconditionally.
1191 ///
1192 /// FIXME: This should be hoisted into a generic utility, likely in
1193 /// Transforms/Util/Local.h
1194 static bool isSafePHIToSpeculate(PHINode &PN) {
1195  const DataLayout &DL = PN.getModule()->getDataLayout();
1196 
1197  // For now, we can only do this promotion if the load is in the same block
1198  // as the PHI, and if there are no stores between the phi and load.
1199  // TODO: Allow recursive phi users.
1200  // TODO: Allow stores.
1201  BasicBlock *BB = PN.getParent();
1202  MaybeAlign MaxAlign;
1203  uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1204  APInt MaxSize(APWidth, 0);
1205  bool HaveLoad = false;
1206  for (User *U : PN.users()) {
1207  LoadInst *LI = dyn_cast<LoadInst>(U);
1208  if (!LI || !LI->isSimple())
1209  return false;
1210 
1211  // For now we only allow loads in the same block as the PHI. This is
1212  // a common case that happens when instcombine merges two loads through
1213  // a PHI.
1214  if (LI->getParent() != BB)
1215  return false;
1216 
1217  // Ensure that there are no instructions between the PHI and the load that
1218  // could store.
1219  for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1220  if (BBI->mayWriteToMemory())
1221  return false;
1222 
1223  uint64_t Size = DL.getTypeStoreSize(LI->getType());
1224  MaxAlign = std::max(MaxAlign, MaybeAlign(LI->getAlignment()));
1225  MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize;
1226  HaveLoad = true;
1227  }
1228 
1229  if (!HaveLoad)
1230  return false;
1231 
1232  // We can only transform this if it is safe to push the loads into the
1233  // predecessor blocks. The only thing to watch out for is that we can't put
1234  // a possibly trapping load in the predecessor if it is a critical edge.
1235  for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1236  Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator();
1237  Value *InVal = PN.getIncomingValue(Idx);
1238 
1239  // If the value is produced by the terminator of the predecessor (an
1240  // invoke) or it has side-effects, there is no valid place to put a load
1241  // in the predecessor.
1242  if (TI == InVal || TI->mayHaveSideEffects())
1243  return false;
1244 
1245  // If the predecessor has a single successor, then the edge isn't
1246  // critical.
1247  if (TI->getNumSuccessors() == 1)
1248  continue;
1249 
1250  // If this pointer is always safe to load, or if we can prove that there
1251  // is already a load in the block, then we can move the load to the pred
1252  // block.
1253  if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI))
1254  continue;
1255 
1256  return false;
1257  }
1258 
1259  return true;
1260 }
1261 
1262 static void speculatePHINodeLoads(PHINode &PN) {
1263  LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
1264 
1265  LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1266  Type *LoadTy = SomeLoad->getType();
1267  IRBuilderTy PHIBuilder(&PN);
1268  PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1269  PN.getName() + ".sroa.speculated");
1270 
1271  // Get the AA tags and alignment to use from one of the loads. It does not
1272  // matter which one we get and if any differ.
1273  AAMDNodes AATags;
1274  SomeLoad->getAAMetadata(AATags);
1275  const MaybeAlign Align = MaybeAlign(SomeLoad->getAlignment());
1276 
1277  // Rewrite all loads of the PN to use the new PHI.
1278  while (!PN.use_empty()) {
1279  LoadInst *LI = cast<LoadInst>(PN.user_back());
1280  LI->replaceAllUsesWith(NewPN);
1281  LI->eraseFromParent();
1282  }
1283 
1284  // Inject loads into all of the pred blocks.
1285  DenseMap<BasicBlock*, Value*> InjectedLoads;
1286  for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1287  BasicBlock *Pred = PN.getIncomingBlock(Idx);
1288  Value *InVal = PN.getIncomingValue(Idx);
1289 
1290  // A PHI node is allowed to have multiple (duplicated) entries for the same
1291  // basic block, as long as the value is the same. So if we already injected
1292  // a load in the predecessor, then we should reuse the same load for all
1293  // duplicated entries.
1294  if (Value* V = InjectedLoads.lookup(Pred)) {
1295  NewPN->addIncoming(V, Pred);
1296  continue;
1297  }
1298 
1299  Instruction *TI = Pred->getTerminator();
1300  IRBuilderTy PredBuilder(TI);
1301 
1302  LoadInst *Load = PredBuilder.CreateLoad(
1303  LoadTy, InVal,
1304  (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1305  ++NumLoadsSpeculated;
1306  Load->setAlignment(Align);
1307  if (AATags)
1308  Load->setAAMetadata(AATags);
1309  NewPN->addIncoming(Load, Pred);
1310  InjectedLoads[Pred] = Load;
1311  }
1312 
1313  LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1314  PN.eraseFromParent();
1315 }
1316 
1317 /// Select instructions that use an alloca and are subsequently loaded can be
1318 /// rewritten to load both input pointers and then select between the result,
1319 /// allowing the load of the alloca to be promoted.
1320 /// From this:
1321 /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
1322 /// %V = load i32* %P2
1323 /// to:
1324 /// %V1 = load i32* %Alloca -> will be mem2reg'd
1325 /// %V2 = load i32* %Other
1326 /// %V = select i1 %cond, i32 %V1, i32 %V2
1327 ///
1328 /// We can do this to a select if its only uses are loads and if the operand
1329 /// to the select can be loaded unconditionally.
1331  Value *TValue = SI.getTrueValue();
1332  Value *FValue = SI.getFalseValue();
1333  const DataLayout &DL = SI.getModule()->getDataLayout();
1334 
1335  for (User *U : SI.users()) {
1336  LoadInst *LI = dyn_cast<LoadInst>(U);
1337  if (!LI || !LI->isSimple())
1338  return false;
1339 
1340  // Both operands to the select need to be dereferenceable, either
1341  // absolutely (e.g. allocas) or at this point because we can see other
1342  // accesses to it.
1343  if (!isSafeToLoadUnconditionally(TValue, LI->getType(),
1344  MaybeAlign(LI->getAlignment()), DL, LI))
1345  return false;
1346  if (!isSafeToLoadUnconditionally(FValue, LI->getType(),
1347  MaybeAlign(LI->getAlignment()), DL, LI))
1348  return false;
1349  }
1350 
1351  return true;
1352 }
1353 
1355  LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
1356 
1357  IRBuilderTy IRB(&SI);
1358  Value *TV = SI.getTrueValue();
1359  Value *FV = SI.getFalseValue();
1360  // Replace the loads of the select with a select of two loads.
1361  while (!SI.use_empty()) {
1362  LoadInst *LI = cast<LoadInst>(SI.user_back());
1363  assert(LI->isSimple() && "We only speculate simple loads");
1364 
1365  IRB.SetInsertPoint(LI);
1366  LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
1367  LI->getName() + ".sroa.speculate.load.true");
1368  LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
1369  LI->getName() + ".sroa.speculate.load.false");
1370  NumLoadsSpeculated += 2;
1371 
1372  // Transfer alignment and AA info if present.
1373  TL->setAlignment(MaybeAlign(LI->getAlignment()));
1374  FL->setAlignment(MaybeAlign(LI->getAlignment()));
1375 
1376  AAMDNodes Tags;
1377  LI->getAAMetadata(Tags);
1378  if (Tags) {
1379  TL->setAAMetadata(Tags);
1380  FL->setAAMetadata(Tags);
1381  }
1382 
1383  Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1384  LI->getName() + ".sroa.speculated");
1385 
1386  LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1387  LI->replaceAllUsesWith(V);
1388  LI->eraseFromParent();
1389  }
1390  SI.eraseFromParent();
1391 }
1392 
1393 /// Build a GEP out of a base pointer and indices.
1394 ///
1395 /// This will return the BasePtr if that is valid, or build a new GEP
1396 /// instruction using the IRBuilder if GEP-ing is needed.
1397 static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
1398  SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
1399  if (Indices.empty())
1400  return BasePtr;
1401 
1402  // A single zero index is a no-op, so check for this and avoid building a GEP
1403  // in that case.
1404  if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
1405  return BasePtr;
1406 
1407  return IRB.CreateInBoundsGEP(BasePtr->getType()->getPointerElementType(),
1408  BasePtr, Indices, NamePrefix + "sroa_idx");
1409 }
1410 
1411 /// Get a natural GEP off of the BasePtr walking through Ty toward
1412 /// TargetTy without changing the offset of the pointer.
1413 ///
1414 /// This routine assumes we've already established a properly offset GEP with
1415 /// Indices, and arrived at the Ty type. The goal is to continue to GEP with
1416 /// zero-indices down through type layers until we find one the same as
1417 /// TargetTy. If we can't find one with the same type, we at least try to use
1418 /// one with the same size. If none of that works, we just produce the GEP as
1419 /// indicated by Indices to have the correct offset.
1420 static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
1421  Value *BasePtr, Type *Ty, Type *TargetTy,
1422  SmallVectorImpl<Value *> &Indices,
1423  Twine NamePrefix) {
1424  if (Ty == TargetTy)
1425  return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1426 
1427  // Offset size to use for the indices.
1428  unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType());
1429 
1430  // See if we can descend into a struct and locate a field with the correct
1431  // type.
1432  unsigned NumLayers = 0;
1433  Type *ElementTy = Ty;
1434  do {
1435  if (ElementTy->isPointerTy())
1436  break;
1437 
1438  if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
1439  ElementTy = ArrayTy->getElementType();
1440  Indices.push_back(IRB.getIntN(OffsetSize, 0));
1441  } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
1442  ElementTy = VectorTy->getElementType();
1443  Indices.push_back(IRB.getInt32(0));
1444  } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
1445  if (STy->element_begin() == STy->element_end())
1446  break; // Nothing left to descend into.
1447  ElementTy = *STy->element_begin();
1448  Indices.push_back(IRB.getInt32(0));
1449  } else {
1450  break;
1451  }
1452  ++NumLayers;
1453  } while (ElementTy != TargetTy);
1454  if (ElementTy != TargetTy)
1455  Indices.erase(Indices.end() - NumLayers, Indices.end());
1456 
1457  return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1458 }
1459 
1460 /// Recursively compute indices for a natural GEP.
1461 ///
1462 /// This is the recursive step for getNaturalGEPWithOffset that walks down the
1463 /// element types adding appropriate indices for the GEP.
1464 static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
1465  Value *Ptr, Type *Ty, APInt &Offset,
1466  Type *TargetTy,
1467  SmallVectorImpl<Value *> &Indices,
1468  Twine NamePrefix) {
1469  if (Offset == 0)
1470  return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
1471  NamePrefix);
1472 
1473  // We can't recurse through pointer types.
1474  if (Ty->isPointerTy())
1475  return nullptr;
1476 
1477  // We try to analyze GEPs over vectors here, but note that these GEPs are
1478  // extremely poorly defined currently. The long-term goal is to remove GEPing
1479  // over a vector from the IR completely.
1480  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
1481  unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
1482  if (ElementSizeInBits % 8 != 0) {
1483  // GEPs over non-multiple of 8 size vector elements are invalid.
1484  return nullptr;
1485  }
1486  APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
1487  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1488  if (NumSkippedElements.ugt(VecTy->getNumElements()))
1489  return nullptr;
1490  Offset -= NumSkippedElements * ElementSize;
1491  Indices.push_back(IRB.getInt(NumSkippedElements));
1492  return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
1493  Offset, TargetTy, Indices, NamePrefix);
1494  }
1495 
1496  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
1497  Type *ElementTy = ArrTy->getElementType();
1498  APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
1499  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1500  if (NumSkippedElements.ugt(ArrTy->getNumElements()))
1501  return nullptr;
1502 
1503  Offset -= NumSkippedElements * ElementSize;
1504  Indices.push_back(IRB.getInt(NumSkippedElements));
1505  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1506  Indices, NamePrefix);
1507  }
1508 
1509  StructType *STy = dyn_cast<StructType>(Ty);
1510  if (!STy)
1511  return nullptr;
1512 
1513  const StructLayout *SL = DL.getStructLayout(STy);
1514  uint64_t StructOffset = Offset.getZExtValue();
1515  if (StructOffset >= SL->getSizeInBytes())
1516  return nullptr;
1517  unsigned Index = SL->getElementContainingOffset(StructOffset);
1518  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
1519  Type *ElementTy = STy->getElementType(Index);
1520  if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
1521  return nullptr; // The offset points into alignment padding.
1522 
1523  Indices.push_back(IRB.getInt32(Index));
1524  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1525  Indices, NamePrefix);
1526 }
1527 
1528 /// Get a natural GEP from a base pointer to a particular offset and
1529 /// resulting in a particular type.
1530 ///
1531 /// The goal is to produce a "natural" looking GEP that works with the existing
1532 /// composite types to arrive at the appropriate offset and element type for
1533 /// a pointer. TargetTy is the element type the returned GEP should point-to if
1534 /// possible. We recurse by decreasing Offset, adding the appropriate index to
1535 /// Indices, and setting Ty to the result subtype.
1536 ///
1537 /// If no natural GEP can be constructed, this function returns null.
1538 static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
1539  Value *Ptr, APInt Offset, Type *TargetTy,
1540  SmallVectorImpl<Value *> &Indices,
1541  Twine NamePrefix) {
1542  PointerType *Ty = cast<PointerType>(Ptr->getType());
1543 
1544  // Don't consider any GEPs through an i8* as natural unless the TargetTy is
1545  // an i8.
1546  if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
1547  return nullptr;
1548 
1549  Type *ElementTy = Ty->getElementType();
1550  if (!ElementTy->isSized())
1551  return nullptr; // We can't GEP through an unsized element.
1552  APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
1553  if (ElementSize == 0)
1554  return nullptr; // Zero-length arrays can't help us build a natural GEP.
1555  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1556 
1557  Offset -= NumSkippedElements * ElementSize;
1558  Indices.push_back(IRB.getInt(NumSkippedElements));
1559  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1560  Indices, NamePrefix);
1561 }
1562 
1563 /// Compute an adjusted pointer from Ptr by Offset bytes where the
1564 /// resulting pointer has PointerTy.
1565 ///
1566 /// This tries very hard to compute a "natural" GEP which arrives at the offset
1567 /// and produces the pointer type desired. Where it cannot, it will try to use
1568 /// the natural GEP to arrive at the offset and bitcast to the type. Where that
1569 /// fails, it will try to use an existing i8* and GEP to the byte offset and
1570 /// bitcast to the type.
1571 ///
1572 /// The strategy for finding the more natural GEPs is to peel off layers of the
1573 /// pointer, walking back through bit casts and GEPs, searching for a base
1574 /// pointer from which we can compute a natural GEP with the desired
1575 /// properties. The algorithm tries to fold as many constant indices into
1576 /// a single GEP as possible, thus making each GEP more independent of the
1577 /// surrounding code.
1578 static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1579  APInt Offset, Type *PointerTy, Twine NamePrefix) {
1580  // Even though we don't look through PHI nodes, we could be called on an
1581  // instruction in an unreachable block, which may be on a cycle.
1582  SmallPtrSet<Value *, 4> Visited;
1583  Visited.insert(Ptr);
1584  SmallVector<Value *, 4> Indices;
1585 
1586  // We may end up computing an offset pointer that has the wrong type. If we
1587  // never are able to compute one directly that has the correct type, we'll
1588  // fall back to it, so keep it and the base it was computed from around here.
1589  Value *OffsetPtr = nullptr;
1590  Value *OffsetBasePtr;
1591 
1592  // Remember any i8 pointer we come across to re-use if we need to do a raw
1593  // byte offset.
1594  Value *Int8Ptr = nullptr;
1595  APInt Int8PtrOffset(Offset.getBitWidth(), 0);
1596 
1597  PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
1598  Type *TargetTy = TargetPtrTy->getElementType();
1599 
1600  // As `addrspacecast` is , `Ptr` (the storage pointer) may have different
1601  // address space from the expected `PointerTy` (the pointer to be used).
1602  // Adjust the pointer type based the original storage pointer.
1603  auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
1604  PointerTy = TargetTy->getPointerTo(AS);
1605 
1606  do {
1607  // First fold any existing GEPs into the offset.
1608  while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
1609  APInt GEPOffset(Offset.getBitWidth(), 0);
1610  if (!GEP->accumulateConstantOffset(DL, GEPOffset))
1611  break;
1612  Offset += GEPOffset;
1613  Ptr = GEP->getPointerOperand();
1614  if (!Visited.insert(Ptr).second)
1615  break;
1616  }
1617 
1618  // See if we can perform a natural GEP here.
1619  Indices.clear();
1620  if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
1621  Indices, NamePrefix)) {
1622  // If we have a new natural pointer at the offset, clear out any old
1623  // offset pointer we computed. Unless it is the base pointer or
1624  // a non-instruction, we built a GEP we don't need. Zap it.
1625  if (OffsetPtr && OffsetPtr != OffsetBasePtr)
1626  if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
1627  assert(I->use_empty() && "Built a GEP with uses some how!");
1628  I->eraseFromParent();
1629  }
1630  OffsetPtr = P;
1631  OffsetBasePtr = Ptr;
1632  // If we also found a pointer of the right type, we're done.
1633  if (P->getType() == PointerTy)
1634  break;
1635  }
1636 
1637  // Stash this pointer if we've found an i8*.
1638  if (Ptr->getType()->isIntegerTy(8)) {
1639  Int8Ptr = Ptr;
1640  Int8PtrOffset = Offset;
1641  }
1642 
1643  // Peel off a layer of the pointer and update the offset appropriately.
1644  if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
1645  Ptr = cast<Operator>(Ptr)->getOperand(0);
1646  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
1647  if (GA->isInterposable())
1648  break;
1649  Ptr = GA->getAliasee();
1650  } else {
1651  break;
1652  }
1653  assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
1654  } while (Visited.insert(Ptr).second);
1655 
1656  if (!OffsetPtr) {
1657  if (!Int8Ptr) {
1658  Int8Ptr = IRB.CreateBitCast(
1659  Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
1660  NamePrefix + "sroa_raw_cast");
1661  Int8PtrOffset = Offset;
1662  }
1663 
1664  OffsetPtr = Int8PtrOffset == 0
1665  ? Int8Ptr
1666  : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
1667  IRB.getInt(Int8PtrOffset),
1668  NamePrefix + "sroa_raw_idx");
1669  }
1670  Ptr = OffsetPtr;
1671 
1672  // On the off chance we were targeting i8*, guard the bitcast here.
1673  if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
1674  Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
1675  TargetPtrTy,
1676  NamePrefix + "sroa_cast");
1677  }
1678 
1679  return Ptr;
1680 }
1681 
1682 /// Compute the adjusted alignment for a load or store from an offset.
1683 static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
1684  const DataLayout &DL) {
1685  unsigned Alignment;
1686  Type *Ty;
1687  if (auto *LI = dyn_cast<LoadInst>(I)) {
1688  Alignment = LI->getAlignment();
1689  Ty = LI->getType();
1690  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
1691  Alignment = SI->getAlignment();
1692  Ty = SI->getValueOperand()->getType();
1693  } else {
1694  llvm_unreachable("Only loads and stores are allowed!");
1695  }
1696 
1697  if (!Alignment)
1698  Alignment = DL.getABITypeAlignment(Ty);
1699 
1700  return MinAlign(Alignment, Offset);
1701 }
1702 
1703 /// Test whether we can convert a value from the old to the new type.
1704 ///
1705 /// This predicate should be used to guard calls to convertValue in order to
1706 /// ensure that we only try to convert viable values. The strategy is that we
1707 /// will peel off single element struct and array wrappings to get to an
1708 /// underlying value, and convert that value.
1709 static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
1710  if (OldTy == NewTy)
1711  return true;
1712 
1713  // For integer types, we can't handle any bit-width differences. This would
1714  // break both vector conversions with extension and introduce endianness
1715  // issues when in conjunction with loads and stores.
1716  if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1717  assert(cast<IntegerType>(OldTy)->getBitWidth() !=
1718  cast<IntegerType>(NewTy)->getBitWidth() &&
1719  "We can't have the same bitwidth for different int types");
1720  return false;
1721  }
1722 
1723  if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
1724  return false;
1725  if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
1726  return false;
1727 
1728  // We can convert pointers to integers and vice-versa. Same for vectors
1729  // of pointers and integers.
1730  OldTy = OldTy->getScalarType();
1731  NewTy = NewTy->getScalarType();
1732  if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
1733  if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
1734  return cast<PointerType>(NewTy)->getPointerAddressSpace() ==
1735  cast<PointerType>(OldTy)->getPointerAddressSpace();
1736  }
1737 
1738  // We can convert integers to integral pointers, but not to non-integral
1739  // pointers.
1740  if (OldTy->isIntegerTy())
1741  return !DL.isNonIntegralPointerType(NewTy);
1742 
1743  // We can convert integral pointers to integers, but non-integral pointers
1744  // need to remain pointers.
1745  if (!DL.isNonIntegralPointerType(OldTy))
1746  return NewTy->isIntegerTy();
1747 
1748  return false;
1749  }
1750 
1751  return true;
1752 }
1753 
1754 /// Generic routine to convert an SSA value to a value of a different
1755 /// type.
1756 ///
1757 /// This will try various different casting techniques, such as bitcasts,
1758 /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
1759 /// two types for viability with this routine.
1760 static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
1761  Type *NewTy) {
1762  Type *OldTy = V->getType();
1763  assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
1764 
1765  if (OldTy == NewTy)
1766  return V;
1767 
1768  assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1769  "Integer types must be the exact same to convert.");
1770 
1771  // See if we need inttoptr for this type pair. A cast involving both scalars
1772  // and vectors requires and additional bitcast.
1773  if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
1774  // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
1775  if (OldTy->isVectorTy() && !NewTy->isVectorTy())
1776  return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
1777  NewTy);
1778 
1779  // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
1780  if (!OldTy->isVectorTy() && NewTy->isVectorTy())
1781  return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
1782  NewTy);
1783 
1784  return IRB.CreateIntToPtr(V, NewTy);
1785  }
1786 
1787  // See if we need ptrtoint for this type pair. A cast involving both scalars
1788  // and vectors requires and additional bitcast.
1789  if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
1790  // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
1791  if (OldTy->isVectorTy() && !NewTy->isVectorTy())
1792  return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
1793  NewTy);
1794 
1795  // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
1796  if (!OldTy->isVectorTy() && NewTy->isVectorTy())
1797  return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
1798  NewTy);
1799 
1800  return IRB.CreatePtrToInt(V, NewTy);
1801  }
1802 
1803  return IRB.CreateBitCast(V, NewTy);
1804 }
1805 
1806 /// Test whether the given slice use can be promoted to a vector.
1807 ///
1808 /// This function is called to test each entry in a partition which is slated
1809 /// for a single slice.
1810 static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
1811  VectorType *Ty,
1812  uint64_t ElementSize,
1813  const DataLayout &DL) {
1814  // First validate the slice offsets.
1815  uint64_t BeginOffset =
1816  std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
1817  uint64_t BeginIndex = BeginOffset / ElementSize;
1818  if (BeginIndex * ElementSize != BeginOffset ||
1819  BeginIndex >= Ty->getNumElements())
1820  return false;
1821  uint64_t EndOffset =
1822  std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
1823  uint64_t EndIndex = EndOffset / ElementSize;
1824  if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->getNumElements())
1825  return false;
1826 
1827  assert(EndIndex > BeginIndex && "Empty vector!");
1828  uint64_t NumElements = EndIndex - BeginIndex;
1829  Type *SliceTy = (NumElements == 1)
1830  ? Ty->getElementType()
1831  : VectorType::get(Ty->getElementType(), NumElements);
1832 
1833  Type *SplitIntTy =
1834  Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
1835 
1836  Use *U = S.getUse();
1837 
1838  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
1839  if (MI->isVolatile())
1840  return false;
1841  if (!S.isSplittable())
1842  return false; // Skip any unsplittable intrinsics.
1843  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1844  if (!II->isLifetimeStartOrEnd())
1845  return false;
1846  } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
1847  // Disable vector promotion when there are loads or stores of an FCA.
1848  return false;
1849  } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1850  if (LI->isVolatile())
1851  return false;
1852  Type *LTy = LI->getType();
1853  if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1854  assert(LTy->isIntegerTy());
1855  LTy = SplitIntTy;
1856  }
1857  if (!canConvertValue(DL, SliceTy, LTy))
1858  return false;
1859  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1860  if (SI->isVolatile())
1861  return false;
1862  Type *STy = SI->getValueOperand()->getType();
1863  if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1864  assert(STy->isIntegerTy());
1865  STy = SplitIntTy;
1866  }
1867  if (!canConvertValue(DL, STy, SliceTy))
1868  return false;
1869  } else {
1870  return false;
1871  }
1872 
1873  return true;
1874 }
1875 
1876 /// Test whether the given alloca partitioning and range of slices can be
1877 /// promoted to a vector.
1878 ///
1879 /// This is a quick test to check whether we can rewrite a particular alloca
1880 /// partition (and its newly formed alloca) into a vector alloca with only
1881 /// whole-vector loads and stores such that it could be promoted to a vector
1882 /// SSA value. We only can ensure this for a limited set of operations, and we
1883 /// don't want to do the rewrites unless we are confident that the result will
1884 /// be promotable, so we have an early test here.
1886  // Collect the candidate types for vector-based promotion. Also track whether
1887  // we have different element types.
1888  SmallVector<VectorType *, 4> CandidateTys;
1889  Type *CommonEltTy = nullptr;
1890  bool HaveCommonEltTy = true;
1891  auto CheckCandidateType = [&](Type *Ty) {
1892  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1893  // Return if bitcast to vectors is different for total size in bits.
1894  if (!CandidateTys.empty()) {
1895  VectorType *V = CandidateTys[0];
1896  if (DL.getTypeSizeInBits(VTy) != DL.getTypeSizeInBits(V)) {
1897  CandidateTys.clear();
1898  return;
1899  }
1900  }
1901  CandidateTys.push_back(VTy);
1902  if (!CommonEltTy)
1903  CommonEltTy = VTy->getElementType();
1904  else if (CommonEltTy != VTy->getElementType())
1905  HaveCommonEltTy = false;
1906  }
1907  };
1908  // Consider any loads or stores that are the exact size of the slice.
1909  for (const Slice &S : P)
1910  if (S.beginOffset() == P.beginOffset() &&
1911  S.endOffset() == P.endOffset()) {
1912  if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
1913  CheckCandidateType(LI->getType());
1914  else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
1915  CheckCandidateType(SI->getValueOperand()->getType());
1916  }
1917 
1918  // If we didn't find a vector type, nothing to do here.
1919  if (CandidateTys.empty())
1920  return nullptr;
1921 
1922  // Remove non-integer vector types if we had multiple common element types.
1923  // FIXME: It'd be nice to replace them with integer vector types, but we can't
1924  // do that until all the backends are known to produce good code for all
1925  // integer vector types.
1926  if (!HaveCommonEltTy) {
1927  CandidateTys.erase(
1928  llvm::remove_if(CandidateTys,
1929  [](VectorType *VTy) {
1930  return !VTy->getElementType()->isIntegerTy();
1931  }),
1932  CandidateTys.end());
1933 
1934  // If there were no integer vector types, give up.
1935  if (CandidateTys.empty())
1936  return nullptr;
1937 
1938  // Rank the remaining candidate vector types. This is easy because we know
1939  // they're all integer vectors. We sort by ascending number of elements.
1940  auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
1941  (void)DL;
1942  assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) &&
1943  "Cannot have vector types of different sizes!");
1944  assert(RHSTy->getElementType()->isIntegerTy() &&
1945  "All non-integer types eliminated!");
1946  assert(LHSTy->getElementType()->isIntegerTy() &&
1947  "All non-integer types eliminated!");
1948  return RHSTy->getNumElements() < LHSTy->getNumElements();
1949  };
1950  llvm::sort(CandidateTys, RankVectorTypes);
1951  CandidateTys.erase(
1952  std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
1953  CandidateTys.end());
1954  } else {
1955 // The only way to have the same element type in every vector type is to
1956 // have the same vector type. Check that and remove all but one.
1957 #ifndef NDEBUG
1958  for (VectorType *VTy : CandidateTys) {
1959  assert(VTy->getElementType() == CommonEltTy &&
1960  "Unaccounted for element type!");
1961  assert(VTy == CandidateTys[0] &&
1962  "Different vector types with the same element type!");
1963  }
1964 #endif
1965  CandidateTys.resize(1);
1966  }
1967 
1968  // Try each vector type, and return the one which works.
1969  auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
1970  uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType());
1971 
1972  // While the definition of LLVM vectors is bitpacked, we don't support sizes
1973  // that aren't byte sized.
1974  if (ElementSize % 8)
1975  return false;
1976  assert((DL.getTypeSizeInBits(VTy) % 8) == 0 &&
1977  "vector size not a multiple of element size?");
1978  ElementSize /= 8;
1979 
1980  for (const Slice &S : P)
1981  if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
1982  return false;
1983 
1984  for (const Slice *S : P.splitSliceTails())
1985  if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
1986  return false;
1987 
1988  return true;
1989  };
1990  for (VectorType *VTy : CandidateTys)
1991  if (CheckVectorTypeForPromotion(VTy))
1992  return VTy;
1993 
1994  return nullptr;
1995 }
1996 
1997 /// Test whether a slice of an alloca is valid for integer widening.
1998 ///
1999 /// This implements the necessary checking for the \c isIntegerWideningViable
2000 /// test below on a single slice of the alloca.
2001 static bool isIntegerWideningViableForSlice(const Slice &S,
2002  uint64_t AllocBeginOffset,
2003  Type *AllocaTy,
2004  const DataLayout &DL,
2005  bool &WholeAllocaOp) {
2006  uint64_t Size = DL.getTypeStoreSize(AllocaTy);
2007 
2008  uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2009  uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2010 
2011  // We can't reasonably handle cases where the load or store extends past
2012  // the end of the alloca's type and into its padding.
2013  if (RelEnd > Size)
2014  return false;
2015 
2016  Use *U = S.getUse();
2017 
2018  if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2019  if (LI->isVolatile())
2020  return false;
2021  // We can't handle loads that extend past the allocated memory.
2022  if (DL.getTypeStoreSize(LI->getType()) > Size)
2023  return false;
2024  // So far, AllocaSliceRewriter does not support widening split slice tails
2025  // in rewriteIntegerLoad.
2026  if (S.beginOffset() < AllocBeginOffset)
2027  return false;
2028  // Note that we don't count vector loads or stores as whole-alloca
2029  // operations which enable integer widening because we would prefer to use
2030  // vector widening instead.
2031  if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2032  WholeAllocaOp = true;
2033  if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2034  if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
2035  return false;
2036  } else if (RelBegin != 0 || RelEnd != Size ||
2037  !canConvertValue(DL, AllocaTy, LI->getType())) {
2038  // Non-integer loads need to be convertible from the alloca type so that
2039  // they are promotable.
2040  return false;
2041  }
2042  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2043  Type *ValueTy = SI->getValueOperand()->getType();
2044  if (SI->isVolatile())
2045  return false;
2046  // We can't handle stores that extend past the allocated memory.
2047  if (DL.getTypeStoreSize(ValueTy) > Size)
2048  return false;
2049  // So far, AllocaSliceRewriter does not support widening split slice tails
2050  // in rewriteIntegerStore.
2051  if (S.beginOffset() < AllocBeginOffset)
2052  return false;
2053  // Note that we don't count vector loads or stores as whole-alloca
2054  // operations which enable integer widening because we would prefer to use
2055  // vector widening instead.
2056  if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2057  WholeAllocaOp = true;
2058  if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2059  if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
2060  return false;
2061  } else if (RelBegin != 0 || RelEnd != Size ||
2062  !canConvertValue(DL, ValueTy, AllocaTy)) {
2063  // Non-integer stores need to be convertible to the alloca type so that
2064  // they are promotable.
2065  return false;
2066  }
2067  } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2068  if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2069  return false;
2070  if (!S.isSplittable())
2071  return false; // Skip any unsplittable intrinsics.
2072  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2073  if (!II->isLifetimeStartOrEnd())
2074  return false;
2075  } else {
2076  return false;
2077  }
2078 
2079  return true;
2080 }
2081 
2082 /// Test whether the given alloca partition's integer operations can be
2083 /// widened to promotable ones.
2084 ///
2085 /// This is a quick test to check whether we can rewrite the integer loads and
2086 /// stores to a particular alloca into wider loads and stores and be able to
2087 /// promote the resulting alloca.
2088 static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2089  const DataLayout &DL) {
2090  uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
2091  // Don't create integer types larger than the maximum bitwidth.
2092  if (SizeInBits > IntegerType::MAX_INT_BITS)
2093  return false;
2094 
2095  // Don't try to handle allocas with bit-padding.
2096  if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy))
2097  return false;
2098 
2099  // We need to ensure that an integer type with the appropriate bitwidth can
2100  // be converted to the alloca type, whatever that is. We don't want to force
2101  // the alloca itself to have an integer type if there is a more suitable one.
2102  Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2103  if (!canConvertValue(DL, AllocaTy, IntTy) ||
2104  !canConvertValue(DL, IntTy, AllocaTy))
2105  return false;
2106 
2107  // While examining uses, we ensure that the alloca has a covering load or
2108  // store. We don't want to widen the integer operations only to fail to
2109  // promote due to some other unsplittable entry (which we may make splittable
2110  // later). However, if there are only splittable uses, go ahead and assume
2111  // that we cover the alloca.
2112  // FIXME: We shouldn't consider split slices that happen to start in the
2113  // partition here...
2114  bool WholeAllocaOp =
2115  P.begin() != P.end() ? false : DL.isLegalInteger(SizeInBits);
2116 
2117  for (const Slice &S : P)
2118  if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2119  WholeAllocaOp))
2120  return false;
2121 
2122  for (const Slice *S : P.splitSliceTails())
2123  if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2124  WholeAllocaOp))
2125  return false;
2126 
2127  return WholeAllocaOp;
2128 }
2129 
2130 static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2131  IntegerType *Ty, uint64_t Offset,
2132  const Twine &Name) {
2133  LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2134  IntegerType *IntTy = cast<IntegerType>(V->getType());
2135  assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
2136  "Element extends past full value");
2137  uint64_t ShAmt = 8 * Offset;
2138  if (DL.isBigEndian())
2139  ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
2140  if (ShAmt) {
2141  V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2142  LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2143  }
2144  assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2145  "Cannot extract to a larger integer!");
2146  if (Ty != IntTy) {
2147  V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2148  LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
2149  }
2150  return V;
2151 }
2152 
2153 static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2154  Value *V, uint64_t Offset, const Twine &Name) {
2155  IntegerType *IntTy = cast<IntegerType>(Old->getType());
2156  IntegerType *Ty = cast<IntegerType>(V->getType());
2157  assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2158  "Cannot insert a larger integer!");
2159  LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2160  if (Ty != IntTy) {
2161  V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2162  LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
2163  }
2164  assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
2165  "Element store outside of alloca store");
2166  uint64_t ShAmt = 8 * Offset;
2167  if (DL.isBigEndian())
2168  ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
2169  if (ShAmt) {
2170  V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2171  LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2172  }
2173 
2174  if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2175  APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2176  Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2177  LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
2178  V = IRB.CreateOr(Old, V, Name + ".insert");
2179  LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
2180  }
2181  return V;
2182 }
2183 
2184 static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2185  unsigned EndIndex, const Twine &Name) {
2186  VectorType *VecTy = cast<VectorType>(V->getType());
2187  unsigned NumElements = EndIndex - BeginIndex;
2188  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2189 
2190  if (NumElements == VecTy->getNumElements())
2191  return V;
2192 
2193  if (NumElements == 1) {
2194  V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2195  Name + ".extract");
2196  LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
2197  return V;
2198  }
2199 
2201  Mask.reserve(NumElements);
2202  for (unsigned i = BeginIndex; i != EndIndex; ++i)
2203  Mask.push_back(IRB.getInt32(i));
2204  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
2205  ConstantVector::get(Mask), Name + ".extract");
2206  LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2207  return V;
2208 }
2209 
2210 static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2211  unsigned BeginIndex, const Twine &Name) {
2212  VectorType *VecTy = cast<VectorType>(Old->getType());
2213  assert(VecTy && "Can only insert a vector into a vector");
2214 
2215  VectorType *Ty = dyn_cast<VectorType>(V->getType());
2216  if (!Ty) {
2217  // Single element to insert.
2218  V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2219  Name + ".insert");
2220  LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
2221  return V;
2222  }
2223 
2224  assert(Ty->getNumElements() <= VecTy->getNumElements() &&
2225  "Too many elements!");
2226  if (Ty->getNumElements() == VecTy->getNumElements()) {
2227  assert(V->getType() == VecTy && "Vector type mismatch");
2228  return V;
2229  }
2230  unsigned EndIndex = BeginIndex + Ty->getNumElements();
2231 
2232  // When inserting a smaller vector into the larger to store, we first
2233  // use a shuffle vector to widen it with undef elements, and then
2234  // a second shuffle vector to select between the loaded vector and the
2235  // incoming vector.
2237  Mask.reserve(VecTy->getNumElements());
2238  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
2239  if (i >= BeginIndex && i < EndIndex)
2240  Mask.push_back(IRB.getInt32(i - BeginIndex));
2241  else
2242  Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
2243  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
2244  ConstantVector::get(Mask), Name + ".expand");
2245  LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2246 
2247  Mask.clear();
2248  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
2249  Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2250 
2251  V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
2252 
2253  LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
2254  return V;
2255 }
2256 
2257 /// Visitor to rewrite instructions using p particular slice of an alloca
2258 /// to use a new alloca.
2259 ///
2260 /// Also implements the rewriting to vector-based accesses when the partition
2261 /// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2262 /// lives here.
2264  : public InstVisitor<AllocaSliceRewriter, bool> {
2265  // Befriend the base class so it can delegate to private visit methods.
2267 
2269 
2270  const DataLayout &DL;
2271  AllocaSlices &AS;
2272  SROA &Pass;
2273  AllocaInst &OldAI, &NewAI;
2274  const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2275  Type *NewAllocaTy;
2276 
2277  // This is a convenience and flag variable that will be null unless the new
2278  // alloca's integer operations should be widened to this integer type due to
2279  // passing isIntegerWideningViable above. If it is non-null, the desired
2280  // integer type will be stored here for easy access during rewriting.
2281  IntegerType *IntTy;
2282 
2283  // If we are rewriting an alloca partition which can be written as pure
2284  // vector operations, we stash extra information here. When VecTy is
2285  // non-null, we have some strict guarantees about the rewritten alloca:
2286  // - The new alloca is exactly the size of the vector type here.
2287  // - The accesses all either map to the entire vector or to a single
2288  // element.
2289  // - The set of accessing instructions is only one of those handled above
2290  // in isVectorPromotionViable. Generally these are the same access kinds
2291  // which are promotable via mem2reg.
2292  VectorType *VecTy;
2293  Type *ElementTy;
2294  uint64_t ElementSize;
2295 
2296  // The original offset of the slice currently being rewritten relative to
2297  // the original alloca.
2298  uint64_t BeginOffset = 0;
2299  uint64_t EndOffset = 0;
2300 
2301  // The new offsets of the slice currently being rewritten relative to the
2302  // original alloca.
2303  uint64_t NewBeginOffset, NewEndOffset;
2304 
2305  uint64_t SliceSize;
2306  bool IsSplittable = false;
2307  bool IsSplit = false;
2308  Use *OldUse = nullptr;
2309  Instruction *OldPtr = nullptr;
2310 
2311  // Track post-rewrite users which are PHI nodes and Selects.
2312  SmallSetVector<PHINode *, 8> &PHIUsers;
2313  SmallSetVector<SelectInst *, 8> &SelectUsers;
2314 
2315  // Utility IR builder, whose name prefix is setup for each visited use, and
2316  // the insertion point is set to point to the user.
2317  IRBuilderTy IRB;
2318 
2319 public:
2321  AllocaInst &OldAI, AllocaInst &NewAI,
2322  uint64_t NewAllocaBeginOffset,
2323  uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2324  VectorType *PromotableVecTy,
2325  SmallSetVector<PHINode *, 8> &PHIUsers,
2326  SmallSetVector<SelectInst *, 8> &SelectUsers)
2327  : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2328  NewAllocaBeginOffset(NewAllocaBeginOffset),
2329  NewAllocaEndOffset(NewAllocaEndOffset),
2330  NewAllocaTy(NewAI.getAllocatedType()),
2331  IntTy(IsIntegerPromotable
2332  ? Type::getIntNTy(
2333  NewAI.getContext(),
2334  DL.getTypeSizeInBits(NewAI.getAllocatedType()))
2335  : nullptr),
2336  VecTy(PromotableVecTy),
2337  ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2338  ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
2339  PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2340  IRB(NewAI.getContext(), ConstantFolder()) {
2341  if (VecTy) {
2342  assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
2343  "Only multiple-of-8 sized vector elements are viable");
2344  ++NumVectorized;
2345  }
2346  assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2347  }
2348 
2350  bool CanSROA = true;
2351  BeginOffset = I->beginOffset();
2352  EndOffset = I->endOffset();
2353  IsSplittable = I->isSplittable();
2354  IsSplit =
2355  BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2356  LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2357  LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
2358  LLVM_DEBUG(dbgs() << "\n");
2359 
2360  // Compute the intersecting offset range.
2361  assert(BeginOffset < NewAllocaEndOffset);
2362  assert(EndOffset > NewAllocaBeginOffset);
2363  NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2364  NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2365 
2366  SliceSize = NewEndOffset - NewBeginOffset;
2367 
2368  OldUse = I->getUse();
2369  OldPtr = cast<Instruction>(OldUse->get());
2370 
2371  Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2372  IRB.SetInsertPoint(OldUserI);
2373  IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2374  IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");
2375 
2376  CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2377  if (VecTy || IntTy)
2378  assert(CanSROA);
2379  return CanSROA;
2380  }
2381 
2382 private:
2383  // Make sure the other visit overloads are visible.
2384  using Base::visit;
2385 
2386  // Every instruction which can end up as a user must have a rewrite rule.
2387  bool visitInstruction(Instruction &I) {
2388  LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
2389  llvm_unreachable("No rewrite rule for this instruction!");
2390  }
2391 
2392  Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
2393  // Note that the offset computation can use BeginOffset or NewBeginOffset
2394  // interchangeably for unsplit slices.
2395  assert(IsSplit || BeginOffset == NewBeginOffset);
2396  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2397 
2398 #ifndef NDEBUG
2399  StringRef OldName = OldPtr->getName();
2400  // Skip through the last '.sroa.' component of the name.
2401  size_t LastSROAPrefix = OldName.rfind(".sroa.");
2402  if (LastSROAPrefix != StringRef::npos) {
2403  OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
2404  // Look for an SROA slice index.
2405  size_t IndexEnd = OldName.find_first_not_of("0123456789");
2406  if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
2407  // Strip the index and look for the offset.
2408  OldName = OldName.substr(IndexEnd + 1);
2409  size_t OffsetEnd = OldName.find_first_not_of("0123456789");
2410  if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
2411  // Strip the offset.
2412  OldName = OldName.substr(OffsetEnd + 1);
2413  }
2414  }
2415  // Strip any SROA suffixes as well.
2416  OldName = OldName.substr(0, OldName.find(".sroa_"));
2417 #endif
2418 
2419  return getAdjustedPtr(IRB, DL, &NewAI,
2420  APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
2421  PointerTy,
2422 #ifndef NDEBUG
2423  Twine(OldName) + "."
2424 #else
2425  Twine()
2426 #endif
2427  );
2428  }
2429 
2430  /// Compute suitable alignment to access this slice of the *new*
2431  /// alloca.
2432  ///
2433  /// You can optionally pass a type to this routine and if that type's ABI
2434  /// alignment is itself suitable, this will return zero.
2435  unsigned getSliceAlign(Type *Ty = nullptr) {
2436  unsigned NewAIAlign = NewAI.getAlignment();
2437  if (!NewAIAlign)
2438  NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
2439  unsigned Align =
2440  MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
2441  return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
2442  }
2443 
2444  unsigned getIndex(uint64_t Offset) {
2445  assert(VecTy && "Can only call getIndex when rewriting a vector");
2446  uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2447  assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
2448  uint32_t Index = RelOffset / ElementSize;
2449  assert(Index * ElementSize == RelOffset);
2450  return Index;
2451  }
2452 
2453  void deleteIfTriviallyDead(Value *V) {
2454  Instruction *I = cast<Instruction>(V);
2456  Pass.DeadInsts.insert(I);
2457  }
2458 
2459  Value *rewriteVectorizedLoadInst() {
2460  unsigned BeginIndex = getIndex(NewBeginOffset);
2461  unsigned EndIndex = getIndex(NewEndOffset);
2462  assert(EndIndex > BeginIndex && "Empty vector!");
2463 
2464  Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2465  NewAI.getAlignment(), "load");
2466  return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
2467  }
2468 
2469  Value *rewriteIntegerLoad(LoadInst &LI) {
2470  assert(IntTy && "We cannot insert an integer to the alloca");
2471  assert(!LI.isVolatile());
2472  Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2473  NewAI.getAlignment(), "load");
2474  V = convertValue(DL, IRB, V, IntTy);
2475  assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2476  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2477  if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2478  IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
2479  V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
2480  }
2481  // It is possible that the extracted type is not the load type. This
2482  // happens if there is a load past the end of the alloca, and as
2483  // a consequence the slice is narrower but still a candidate for integer
2484  // lowering. To handle this case, we just zero extend the extracted
2485  // integer.
2486  assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
2487  "Can only handle an extract for an overly wide load");
2488  if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
2489  V = IRB.CreateZExt(V, LI.getType());
2490  return V;
2491  }
2492 
2493  bool visitLoadInst(LoadInst &LI) {
2494  LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
2495  Value *OldOp = LI.getOperand(0);
2496  assert(OldOp == OldPtr);
2497 
2498  AAMDNodes AATags;
2499  LI.getAAMetadata(AATags);
2500 
2501  unsigned AS = LI.getPointerAddressSpace();
2502 
2503  Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
2504  : LI.getType();
2505  const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
2506  bool IsPtrAdjusted = false;
2507  Value *V;
2508  if (VecTy) {
2509  V = rewriteVectorizedLoadInst();
2510  } else if (IntTy && LI.getType()->isIntegerTy()) {
2511  V = rewriteIntegerLoad(LI);
2512  } else if (NewBeginOffset == NewAllocaBeginOffset &&
2513  NewEndOffset == NewAllocaEndOffset &&
2514  (canConvertValue(DL, NewAllocaTy, TargetTy) ||
2515  (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
2516  TargetTy->isIntegerTy()))) {
2517  LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2518  NewAI.getAlignment(),
2519  LI.isVolatile(), LI.getName());
2520  if (AATags)
2521  NewLI->setAAMetadata(AATags);
2522  if (LI.isVolatile())
2523  NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2524 
2525  // Any !nonnull metadata or !range metadata on the old load is also valid
2526  // on the new load. This is even true in some cases even when the loads
2527  // are different types, for example by mapping !nonnull metadata to
2528  // !range metadata by modeling the null pointer constant converted to the
2529  // integer type.
2530  // FIXME: Add support for range metadata here. Currently the utilities
2531  // for this don't propagate range metadata in trivial cases from one
2532  // integer load to another, don't handle non-addrspace-0 null pointers
2533  // correctly, and don't have any support for mapping ranges as the
2534  // integer type becomes winder or narrower.
2535  if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
2536  copyNonnullMetadata(LI, N, *NewLI);
2537 
2538  // Try to preserve nonnull metadata
2539  V = NewLI;
2540 
2541  // If this is an integer load past the end of the slice (which means the
2542  // bytes outside the slice are undef or this load is dead) just forcibly
2543  // fix the integer size with correct handling of endianness.
2544  if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2545  if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
2546  if (AITy->getBitWidth() < TITy->getBitWidth()) {
2547  V = IRB.CreateZExt(V, TITy, "load.ext");
2548  if (DL.isBigEndian())
2549  V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2550  "endian_shift");
2551  }
2552  } else {
2553  Type *LTy = TargetTy->getPointerTo(AS);
2554  LoadInst *NewLI = IRB.CreateAlignedLoad(
2555  TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(TargetTy),
2556  LI.isVolatile(), LI.getName());
2557  if (AATags)
2558  NewLI->setAAMetadata(AATags);
2559  if (LI.isVolatile())
2560  NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2561 
2562  V = NewLI;
2563  IsPtrAdjusted = true;
2564  }
2565  V = convertValue(DL, IRB, V, TargetTy);
2566 
2567  if (IsSplit) {
2568  assert(!LI.isVolatile());
2569  assert(LI.getType()->isIntegerTy() &&
2570  "Only integer type loads and stores are split");
2571  assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
2572  "Split load isn't smaller than original load");
2574  "Non-byte-multiple bit width");
2575  // Move the insertion point just past the load so that we can refer to it.
2576  IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
2577  // Create a placeholder value with the same type as LI to use as the
2578  // basis for the new value. This allows us to replace the uses of LI with
2579  // the computed value, and then replace the placeholder with LI, leaving
2580  // LI only used for this computation.
2581  Value *Placeholder = new LoadInst(
2582  LI.getType(), UndefValue::get(LI.getType()->getPointerTo(AS)));
2583  V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2584  "insert");
2585  LI.replaceAllUsesWith(V);
2586  Placeholder->replaceAllUsesWith(&LI);
2587  Placeholder->deleteValue();
2588  } else {
2589  LI.replaceAllUsesWith(V);
2590  }
2591 
2592  Pass.DeadInsts.insert(&LI);
2593  deleteIfTriviallyDead(OldOp);
2594  LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
2595  return !LI.isVolatile() && !IsPtrAdjusted;
2596  }
2597 
2598  bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
2599  AAMDNodes AATags) {
2600  if (V->getType() != VecTy) {
2601  unsigned BeginIndex = getIndex(NewBeginOffset);
2602  unsigned EndIndex = getIndex(NewEndOffset);
2603  assert(EndIndex > BeginIndex && "Empty vector!");
2604  unsigned NumElements = EndIndex - BeginIndex;
2605  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2606  Type *SliceTy = (NumElements == 1)
2607  ? ElementTy
2608  : VectorType::get(ElementTy, NumElements);
2609  if (V->getType() != SliceTy)
2610  V = convertValue(DL, IRB, V, SliceTy);
2611 
2612  // Mix in the existing elements.
2613  Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2614  NewAI.getAlignment(), "load");
2615  V = insertVector(IRB, Old, V, BeginIndex, "vec");
2616  }
2617  StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2618  if (AATags)
2619  Store->setAAMetadata(AATags);
2620  Pass.DeadInsts.insert(&SI);
2621 
2622  LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
2623  return true;
2624  }
2625 
2626  bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
2627  assert(IntTy && "We cannot extract an integer from the alloca");
2628  assert(!SI.isVolatile());
2629  if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
2630  Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2631  NewAI.getAlignment(), "oldload");
2632  Old = convertValue(DL, IRB, Old, IntTy);
2633  assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2634  uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
2635  V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
2636  }
2637  V = convertValue(DL, IRB, V, NewAllocaTy);
2638  StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2639  Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2640  LLVMContext::MD_access_group});
2641  if (AATags)
2642  Store->setAAMetadata(AATags);
2643  Pass.DeadInsts.insert(&SI);
2644  LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
2645  return true;
2646  }
2647 
2648  bool visitStoreInst(StoreInst &SI) {
2649  LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
2650  Value *OldOp = SI.getOperand(1);
2651  assert(OldOp == OldPtr);
2652 
2653  AAMDNodes AATags;
2654  SI.getAAMetadata(AATags);
2655 
2656  Value *V = SI.getValueOperand();
2657 
2658  // Strip all inbounds GEPs and pointer casts to try to dig out any root
2659  // alloca that should be re-examined after promoting this alloca.
2660  if (V->getType()->isPointerTy())
2661  if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
2662  Pass.PostPromotionWorklist.insert(AI);
2663 
2664  if (SliceSize < DL.getTypeStoreSize(V->getType())) {
2665  assert(!SI.isVolatile());
2666  assert(V->getType()->isIntegerTy() &&
2667  "Only integer type loads and stores are split");
2669  "Non-byte-multiple bit width");
2670  IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
2671  V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
2672  "extract");
2673  }
2674 
2675  if (VecTy)
2676  return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
2677  if (IntTy && V->getType()->isIntegerTy())
2678  return rewriteIntegerStore(V, SI, AATags);
2679 
2680  const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
2681  StoreInst *NewSI;
2682  if (NewBeginOffset == NewAllocaBeginOffset &&
2683  NewEndOffset == NewAllocaEndOffset &&
2684  (canConvertValue(DL, V->getType(), NewAllocaTy) ||
2685  (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
2686  V->getType()->isIntegerTy()))) {
2687  // If this is an integer store past the end of slice (and thus the bytes
2688  // past that point are irrelevant or this is unreachable), truncate the
2689  // value prior to storing.
2690  if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
2691  if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2692  if (VITy->getBitWidth() > AITy->getBitWidth()) {
2693  if (DL.isBigEndian())
2694  V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2695  "endian_shift");
2696  V = IRB.CreateTrunc(V, AITy, "load.trunc");
2697  }
2698 
2699  V = convertValue(DL, IRB, V, NewAllocaTy);
2700  NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2701  SI.isVolatile());
2702  } else {
2703  unsigned AS = SI.getPointerAddressSpace();
2704  Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
2705  NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
2706  SI.isVolatile());
2707  }
2708  NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2709  LLVMContext::MD_access_group});
2710  if (AATags)
2711  NewSI->setAAMetadata(AATags);
2712  if (SI.isVolatile())
2713  NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
2714  Pass.DeadInsts.insert(&SI);
2715  deleteIfTriviallyDead(OldOp);
2716 
2717  LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
2718  return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
2719  }
2720 
2721  /// Compute an integer value from splatting an i8 across the given
2722  /// number of bytes.
2723  ///
2724  /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
2725  /// call this routine.
2726  /// FIXME: Heed the advice above.
2727  ///
2728  /// \param V The i8 value to splat.
2729  /// \param Size The number of bytes in the output (assuming i8 is one byte)
2730  Value *getIntegerSplat(Value *V, unsigned Size) {
2731  assert(Size > 0 && "Expected a positive number of bytes.");
2732  IntegerType *VTy = cast<IntegerType>(V->getType());
2733  assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
2734  if (Size == 1)
2735  return V;
2736 
2737  Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
2738  V = IRB.CreateMul(
2739  IRB.CreateZExt(V, SplatIntTy, "zext"),
2741  Constant::getAllOnesValue(SplatIntTy),
2743  SplatIntTy)),
2744  "isplat");
2745  return V;
2746  }
2747 
2748  /// Compute a vector splat for a given element value.
2749  Value *getVectorSplat(Value *V, unsigned NumElements) {
2750  V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
2751  LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
2752  return V;
2753  }
2754 
2755  bool visitMemSetInst(MemSetInst &II) {
2756  LLVM_DEBUG(dbgs() << " original: " << II << "\n");
2757  assert(II.getRawDest() == OldPtr);
2758 
2759  AAMDNodes AATags;
2760  II.getAAMetadata(AATags);
2761 
2762  // If the memset has a variable size, it cannot be split, just adjust the
2763  // pointer to the new alloca.
2764  if (!isa<Constant>(II.getLength())) {
2765  assert(!IsSplit);
2766  assert(NewBeginOffset == BeginOffset);
2767  II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
2768  II.setDestAlignment(getSliceAlign());
2769 
2770  deleteIfTriviallyDead(OldPtr);
2771  return false;
2772  }
2773 
2774  // Record this instruction for deletion.
2775  Pass.DeadInsts.insert(&II);
2776 
2777  Type *AllocaTy = NewAI.getAllocatedType();
2778  Type *ScalarTy = AllocaTy->getScalarType();
2779 
2780  const bool CanContinue = [&]() {
2781  if (VecTy || IntTy)
2782  return true;
2783  if (BeginOffset > NewAllocaBeginOffset ||
2784  EndOffset < NewAllocaEndOffset)
2785  return false;
2786  auto *C = cast<ConstantInt>(II.getLength());
2787  if (C->getBitWidth() > 64)
2788  return false;
2789  const auto Len = C->getZExtValue();
2790  auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
2791  auto *SrcTy = VectorType::get(Int8Ty, Len);
2792  return canConvertValue(DL, SrcTy, AllocaTy) &&
2793  DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy));
2794  }();
2795 
2796  // If this doesn't map cleanly onto the alloca type, and that type isn't
2797  // a single value type, just emit a memset.
2798  if (!CanContinue) {
2799  Type *SizeTy = II.getLength()->getType();
2800  Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
2801  CallInst *New = IRB.CreateMemSet(
2802  getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
2803  getSliceAlign(), II.isVolatile());
2804  if (AATags)
2805  New->setAAMetadata(AATags);
2806  LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
2807  return false;
2808  }
2809 
2810  // If we can represent this as a simple value, we have to build the actual
2811  // value to store, which requires expanding the byte present in memset to
2812  // a sensible representation for the alloca type. This is essentially
2813  // splatting the byte to a sufficiently wide integer, splatting it across
2814  // any desired vector width, and bitcasting to the final type.
2815  Value *V;
2816 
2817  if (VecTy) {
2818  // If this is a memset of a vectorized alloca, insert it.
2819  assert(ElementTy == ScalarTy);
2820 
2821  unsigned BeginIndex = getIndex(NewBeginOffset);
2822  unsigned EndIndex = getIndex(NewEndOffset);
2823  assert(EndIndex > BeginIndex && "Empty vector!");
2824  unsigned NumElements = EndIndex - BeginIndex;
2825  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2826 
2827  Value *Splat =
2828  getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ElementTy) / 8);
2829  Splat = convertValue(DL, IRB, Splat, ElementTy);
2830  if (NumElements > 1)
2831  Splat = getVectorSplat(Splat, NumElements);
2832 
2833  Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2834  NewAI.getAlignment(), "oldload");
2835  V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
2836  } else if (IntTy) {
2837  // If this is a memset on an alloca where we can widen stores, insert the
2838  // set integer.
2839  assert(!II.isVolatile());
2840 
2841  uint64_t Size = NewEndOffset - NewBeginOffset;
2842  V = getIntegerSplat(II.getValue(), Size);
2843 
2844  if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
2845  EndOffset != NewAllocaBeginOffset)) {
2846  Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2847  NewAI.getAlignment(), "oldload");
2848  Old = convertValue(DL, IRB, Old, IntTy);
2849  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2850  V = insertInteger(DL, IRB, Old, V, Offset, "insert");
2851  } else {
2852  assert(V->getType() == IntTy &&
2853  "Wrong type for an alloca wide integer!");
2854  }
2855  V = convertValue(DL, IRB, V, AllocaTy);
2856  } else {
2857  // Established these invariants above.
2858  assert(NewBeginOffset == NewAllocaBeginOffset);
2859  assert(NewEndOffset == NewAllocaEndOffset);
2860 
2861  V = getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ScalarTy) / 8);
2862  if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
2863  V = getVectorSplat(V, AllocaVecTy->getNumElements());
2864 
2865  V = convertValue(DL, IRB, V, AllocaTy);
2866  }
2867 
2868  StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2869  II.isVolatile());
2870  if (AATags)
2871  New->setAAMetadata(AATags);
2872  LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
2873  return !II.isVolatile();
2874  }
2875 
2876  bool visitMemTransferInst(MemTransferInst &II) {
2877  // Rewriting of memory transfer instructions can be a bit tricky. We break
2878  // them into two categories: split intrinsics and unsplit intrinsics.
2879 
2880  LLVM_DEBUG(dbgs() << " original: " << II << "\n");
2881 
2882  AAMDNodes AATags;
2883  II.getAAMetadata(AATags);
2884 
2885  bool IsDest = &II.getRawDestUse() == OldUse;
2886  assert((IsDest && II.getRawDest() == OldPtr) ||
2887  (!IsDest && II.getRawSource() == OldPtr));
2888 
2889  unsigned SliceAlign = getSliceAlign();
2890 
2891  // For unsplit intrinsics, we simply modify the source and destination
2892  // pointers in place. This isn't just an optimization, it is a matter of
2893  // correctness. With unsplit intrinsics we may be dealing with transfers
2894  // within a single alloca before SROA ran, or with transfers that have
2895  // a variable length. We may also be dealing with memmove instead of
2896  // memcpy, and so simply updating the pointers is the necessary for us to
2897  // update both source and dest of a single call.
2898  if (!IsSplittable) {
2899  Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2900  if (IsDest) {
2901  II.setDest(AdjustedPtr);
2902  II.setDestAlignment(SliceAlign);
2903  }
2904  else {
2905  II.setSource(AdjustedPtr);
2906  II.setSourceAlignment(SliceAlign);
2907  }
2908 
2909  LLVM_DEBUG(dbgs() << " to: " << II << "\n");
2910  deleteIfTriviallyDead(OldPtr);
2911  return false;
2912  }
2913  // For split transfer intrinsics we have an incredibly useful assurance:
2914  // the source and destination do not reside within the same alloca, and at
2915  // least one of them does not escape. This means that we can replace
2916  // memmove with memcpy, and we don't need to worry about all manner of
2917  // downsides to splitting and transforming the operations.
2918 
2919  // If this doesn't map cleanly onto the alloca type, and that type isn't
2920  // a single value type, just emit a memcpy.
2921  bool EmitMemCpy =
2922  !VecTy && !IntTy &&
2923  (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2924  SliceSize != DL.getTypeStoreSize(NewAI.getAllocatedType()) ||
2925  !NewAI.getAllocatedType()->isSingleValueType());
2926 
2927  // If we're just going to emit a memcpy, the alloca hasn't changed, and the
2928  // size hasn't been shrunk based on analysis of the viable range, this is
2929  // a no-op.
2930  if (EmitMemCpy && &OldAI == &NewAI) {
2931  // Ensure the start lines up.
2932  assert(NewBeginOffset == BeginOffset);
2933 
2934  // Rewrite the size as needed.
2935  if (NewEndOffset != EndOffset)
2937  NewEndOffset - NewBeginOffset));
2938  return false;
2939  }
2940  // Record this instruction for deletion.
2941  Pass.DeadInsts.insert(&II);
2942 
2943  // Strip all inbounds GEPs and pointer casts to try to dig out any root
2944  // alloca that should be re-examined after rewriting this instruction.
2945  Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
2946  if (AllocaInst *AI =
2947  dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
2948  assert(AI != &OldAI && AI != &NewAI &&
2949  "Splittable transfers cannot reach the same alloca on both ends.");
2950  Pass.Worklist.insert(AI);
2951  }
2952 
2953  Type *OtherPtrTy = OtherPtr->getType();
2954  unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
2955 
2956  // Compute the relative offset for the other pointer within the transfer.
2957  unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
2958  APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
2959  unsigned OtherAlign =
2960  IsDest ? II.getSourceAlignment() : II.getDestAlignment();
2961  OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1,
2962  OtherOffset.zextOrTrunc(64).getZExtValue());
2963 
2964  if (EmitMemCpy) {
2965  // Compute the other pointer, folding as much as possible to produce
2966  // a single, simple GEP in most cases.
2967  OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
2968  OtherPtr->getName() + ".");
2969 
2970  Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2971  Type *SizeTy = II.getLength()->getType();
2972  Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
2973 
2974  Value *DestPtr, *SrcPtr;
2975  unsigned DestAlign, SrcAlign;
2976  // Note: IsDest is true iff we're copying into the new alloca slice
2977  if (IsDest) {
2978  DestPtr = OurPtr;
2979  DestAlign = SliceAlign;
2980  SrcPtr = OtherPtr;
2981  SrcAlign = OtherAlign;
2982  } else {
2983  DestPtr = OtherPtr;
2984  DestAlign = OtherAlign;
2985  SrcPtr = OurPtr;
2986  SrcAlign = SliceAlign;
2987  }
2988  CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
2989  Size, II.isVolatile());
2990  if (AATags)
2991  New->setAAMetadata(AATags);
2992  LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
2993  return false;
2994  }
2995 
2996  bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
2997  NewEndOffset == NewAllocaEndOffset;
2998  uint64_t Size = NewEndOffset - NewBeginOffset;
2999  unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3000  unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3001  unsigned NumElements = EndIndex - BeginIndex;
3002  IntegerType *SubIntTy =
3003  IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3004 
3005  // Reset the other pointer type to match the register type we're going to
3006  // use, but using the address space of the original other pointer.
3007  Type *OtherTy;
3008  if (VecTy && !IsWholeAlloca) {
3009  if (NumElements == 1)
3010  OtherTy = VecTy->getElementType();
3011  else
3012  OtherTy = VectorType::get(VecTy->getElementType(), NumElements);
3013  } else if (IntTy && !IsWholeAlloca) {
3014  OtherTy = SubIntTy;
3015  } else {
3016  OtherTy = NewAllocaTy;
3017  }
3018  OtherPtrTy = OtherTy->getPointerTo(OtherAS);
3019 
3020  Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3021  OtherPtr->getName() + ".");
3022  unsigned SrcAlign = OtherAlign;
3023  Value *DstPtr = &NewAI;
3024  unsigned DstAlign = SliceAlign;
3025  if (!IsDest) {
3026  std::swap(SrcPtr, DstPtr);
3027  std::swap(SrcAlign, DstAlign);
3028  }
3029 
3030  Value *Src;
3031  if (VecTy && !IsWholeAlloca && !IsDest) {
3032  Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3033  NewAI.getAlignment(), "load");
3034  Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3035  } else if (IntTy && !IsWholeAlloca && !IsDest) {
3036  Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3037  NewAI.getAlignment(), "load");
3038  Src = convertValue(DL, IRB, Src, IntTy);
3039  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3040  Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3041  } else {
3042  LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3043  II.isVolatile(), "copyload");
3044  if (AATags)
3045  Load->setAAMetadata(AATags);
3046  Src = Load;
3047  }
3048 
3049  if (VecTy && !IsWholeAlloca && IsDest) {
3050  Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3051  NewAI.getAlignment(), "oldload");
3052  Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3053  } else if (IntTy && !IsWholeAlloca && IsDest) {
3054  Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3055  NewAI.getAlignment(), "oldload");
3056  Old = convertValue(DL, IRB, Old, IntTy);
3057  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3058  Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3059  Src = convertValue(DL, IRB, Src, NewAllocaTy);
3060  }
3061 
3062  StoreInst *Store = cast<StoreInst>(
3063  IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3064  if (AATags)
3065  Store->setAAMetadata(AATags);
3066  LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3067  return !II.isVolatile();
3068  }
3069 
3070  bool visitIntrinsicInst(IntrinsicInst &II) {
3072  LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3073  assert(II.getArgOperand(1) == OldPtr);
3074 
3075  // Record this instruction for deletion.
3076  Pass.DeadInsts.insert(&II);
3077 
3078  // Lifetime intrinsics are only promotable if they cover the whole alloca.
3079  // Therefore, we drop lifetime intrinsics which don't cover the whole
3080  // alloca.
3081  // (In theory, intrinsics which partially cover an alloca could be
3082  // promoted, but PromoteMemToReg doesn't handle that case.)
3083  // FIXME: Check whether the alloca is promotable before dropping the
3084  // lifetime intrinsics?
3085  if (NewBeginOffset != NewAllocaBeginOffset ||
3086  NewEndOffset != NewAllocaEndOffset)
3087  return true;
3088 
3089  ConstantInt *Size =
3090  ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
3091  NewEndOffset - NewBeginOffset);
3092  // Lifetime intrinsics always expect an i8* so directly get such a pointer
3093  // for the new alloca slice.
3094  Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
3095  Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3096  Value *New;
3097  if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3098  New = IRB.CreateLifetimeStart(Ptr, Size);
3099  else
3100  New = IRB.CreateLifetimeEnd(Ptr, Size);
3101 
3102  (void)New;
3103  LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3104 
3105  return true;
3106  }
3107 
3108  void fixLoadStoreAlign(Instruction &Root) {
3109  // This algorithm implements the same visitor loop as
3110  // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3111  // or store found.
3114  Visited.insert(&Root);
3115  Uses.push_back(&Root);
3116  do {
3117  Instruction *I = Uses.pop_back_val();
3118 
3119  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3120  unsigned LoadAlign = LI->getAlignment();
3121  if (!LoadAlign)
3122  LoadAlign = DL.getABITypeAlignment(LI->getType());
3123  LI->setAlignment(MaybeAlign(std::min(LoadAlign, getSliceAlign())));
3124  continue;
3125  }
3126  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3127  unsigned StoreAlign = SI->getAlignment();
3128  if (!StoreAlign) {
3129  Value *Op = SI->getOperand(0);
3130  StoreAlign = DL.getABITypeAlignment(Op->getType());
3131  }
3132  SI->setAlignment(MaybeAlign(std::min(StoreAlign, getSliceAlign())));
3133  continue;
3134  }
3135 
3136  assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
3137  isa<PHINode>(I) || isa<SelectInst>(I) ||
3138  isa<GetElementPtrInst>(I));
3139  for (User *U : I->users())
3140  if (Visited.insert(cast<Instruction>(U)).second)
3141  Uses.push_back(cast<Instruction>(U));
3142  } while (!Uses.empty());
3143  }
3144 
3145  bool visitPHINode(PHINode &PN) {
3146  LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
3147  assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3148  assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3149 
3150  // We would like to compute a new pointer in only one place, but have it be
3151  // as local as possible to the PHI. To do that, we re-use the location of
3152  // the old pointer, which necessarily must be in the right position to
3153  // dominate the PHI.
3154  IRBuilderTy PtrBuilder(IRB);
3155  if (isa<PHINode>(OldPtr))
3156  PtrBuilder.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
3157  else
3158  PtrBuilder.SetInsertPoint(OldPtr);
3159  PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3160 
3161  Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
3162  // Replace the operands which were using the old pointer.
3163  std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
3164 
3165  LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
3166  deleteIfTriviallyDead(OldPtr);
3167 
3168  // Fix the alignment of any loads or stores using this PHI node.
3169  fixLoadStoreAlign(PN);
3170 
3171  // PHIs can't be promoted on their own, but often can be speculated. We
3172  // check the speculation outside of the rewriter so that we see the
3173  // fully-rewritten alloca.
3174  PHIUsers.insert(&PN);
3175  return true;
3176  }
3177 
3178  bool visitSelectInst(SelectInst &SI) {
3179  LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3180  assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
3181  "Pointer isn't an operand!");
3182  assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
3183  assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
3184 
3185  Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3186  // Replace the operands which were using the old pointer.
3187  if (SI.getOperand(1) == OldPtr)
3188  SI.setOperand(1, NewPtr);
3189  if (SI.getOperand(2) == OldPtr)
3190  SI.setOperand(2, NewPtr);
3191 
3192  LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
3193  deleteIfTriviallyDead(OldPtr);
3194 
3195  // Fix the alignment of any loads or stores using this select.
3196  fixLoadStoreAlign(SI);
3197 
3198  // Selects can't be promoted on their own, but often can be speculated. We
3199  // check the speculation outside of the rewriter so that we see the
3200  // fully-rewritten alloca.
3201  SelectUsers.insert(&SI);
3202  return true;
3203  }
3204 };
3205 
3206 namespace {
3207 
3208 /// Visitor to rewrite aggregate loads and stores as scalar.
3209 ///
3210 /// This pass aggressively rewrites all aggregate loads and stores on
3211 /// a particular pointer (or any pointer derived from it which we can identify)
3212 /// with scalar loads and stores.
3213 class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3214  // Befriend the base class so it can delegate to private visit methods.
3215  friend class InstVisitor<AggLoadStoreRewriter, bool>;
3216 
3217  /// Queue of pointer uses to analyze and potentially rewrite.
3218  SmallVector<Use *, 8> Queue;
3219 
3220  /// Set to prevent us from cycling with phi nodes and loops.
3221  SmallPtrSet<User *, 8> Visited;
3222 
3223  /// The current pointer use being rewritten. This is used to dig up the used
3224  /// value (as opposed to the user).
3225  Use *U;
3226 
3227  /// Used to calculate offsets, and hence alignment, of subobjects.
3228  const DataLayout &DL;
3229 
3230 public:
3231  AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
3232 
3233  /// Rewrite loads and stores through a pointer and all pointers derived from
3234  /// it.
3235  bool rewrite(Instruction &I) {
3236  LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
3237  enqueueUsers(I);
3238  bool Changed = false;
3239  while (!Queue.empty()) {
3240  U = Queue.pop_back_val();
3241  Changed |= visit(cast<Instruction>(U->getUser()));
3242  }
3243  return Changed;
3244  }
3245 
3246 private:
3247  /// Enqueue all the users of the given instruction for further processing.
3248  /// This uses a set to de-duplicate users.
3249  void enqueueUsers(Instruction &I) {
3250  for (Use &U : I.uses())
3251  if (Visited.insert(U.getUser()).second)
3252  Queue.push_back(&U);
3253  }
3254 
3255  // Conservative default is to not rewrite anything.
3256  bool visitInstruction(Instruction &I) { return false; }
3257 
3258  /// Generic recursive split emission class.
3259  template <typename Derived> class OpSplitter {
3260  protected:
3261  /// The builder used to form new instructions.
3262  IRBuilderTy IRB;
3263 
3264  /// The indices which to be used with insert- or extractvalue to select the
3265  /// appropriate value within the aggregate.
3266  SmallVector<unsigned, 4> Indices;
3267 
3268  /// The indices to a GEP instruction which will move Ptr to the correct slot
3269  /// within the aggregate.
3270  SmallVector<Value *, 4> GEPIndices;
3271 
3272  /// The base pointer of the original op, used as a base for GEPing the
3273  /// split operations.
3274  Value *Ptr;
3275 
3276  /// The base pointee type being GEPed into.
3277  Type *BaseTy;
3278 
3279  /// Known alignment of the base pointer.
3280  unsigned BaseAlign;
3281 
3282  /// To calculate offset of each component so we can correctly deduce
3283  /// alignments.
3284  const DataLayout &DL;
3285 
3286  /// Initialize the splitter with an insertion point, Ptr and start with a
3287  /// single zero GEP index.
3288  OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3289  unsigned BaseAlign, const DataLayout &DL)
3290  : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr),
3291  BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {}
3292 
3293  public:
3294  /// Generic recursive split emission routine.
3295  ///
3296  /// This method recursively splits an aggregate op (load or store) into
3297  /// scalar or vector ops. It splits recursively until it hits a single value
3298  /// and emits that single value operation via the template argument.
3299  ///
3300  /// The logic of this routine relies on GEPs and insertvalue and
3301  /// extractvalue all operating with the same fundamental index list, merely
3302  /// formatted differently (GEPs need actual values).
3303  ///
3304  /// \param Ty The type being split recursively into smaller ops.
3305  /// \param Agg The aggregate value being built up or stored, depending on
3306  /// whether this is splitting a load or a store respectively.
3307  void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
3308  if (Ty->isSingleValueType()) {
3309  unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
3310  return static_cast<Derived *>(this)->emitFunc(
3311  Ty, Agg, MinAlign(BaseAlign, Offset), Name);
3312  }
3313 
3314  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3315  unsigned OldSize = Indices.size();
3316  (void)OldSize;
3317  for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
3318  ++Idx) {
3319  assert(Indices.size() == OldSize && "Did not return to the old size");
3320  Indices.push_back(Idx);
3321  GEPIndices.push_back(IRB.getInt32(Idx));
3322  emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
3323  GEPIndices.pop_back();
3324  Indices.pop_back();
3325  }
3326  return;
3327  }
3328 
3329  if (StructType *STy = dyn_cast<StructType>(Ty)) {
3330  unsigned OldSize = Indices.size();
3331  (void)OldSize;
3332  for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
3333  ++Idx) {
3334  assert(Indices.size() == OldSize && "Did not return to the old size");
3335  Indices.push_back(Idx);
3336  GEPIndices.push_back(IRB.getInt32(Idx));
3337  emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
3338  GEPIndices.pop_back();
3339  Indices.pop_back();
3340  }
3341  return;
3342  }
3343 
3344  llvm_unreachable("Only arrays and structs are aggregate loadable types");
3345  }
3346  };
3347 
3348  struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
3349  AAMDNodes AATags;
3350 
3351  LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3352  AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
3353  : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3354  DL), AATags(AATags) {}
3355 
3356  /// Emit a leaf load of a single value. This is called at the leaves of the
3357  /// recursive emission to actually load values.
3358  void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
3359  assert(Ty->isSingleValueType());
3360  // Load the single value and insert it using the indices.
3361  Value *GEP =
3362  IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3363  LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load");
3364  if (AATags)
3365  Load->setAAMetadata(AATags);
3366  Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
3367  LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
3368  }
3369  };
3370 
3371  bool visitLoadInst(LoadInst &LI) {
3372  assert(LI.getPointerOperand() == *U);
3373  if (!LI.isSimple() || LI.getType()->isSingleValueType())
3374  return false;
3375 
3376  // We have an aggregate being loaded, split it apart.
3377  LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
3378  AAMDNodes AATags;
3379  LI.getAAMetadata(AATags);
3380  LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
3381  getAdjustedAlignment(&LI, 0, DL), DL);
3382  Value *V = UndefValue::get(LI.getType());
3383  Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
3384  LI.replaceAllUsesWith(V);
3385  LI.eraseFromParent();
3386  return true;
3387  }
3388 
3389  struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
3390  StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3391  AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
3392  : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3393  DL),
3394  AATags(AATags) {}
3395  AAMDNodes AATags;
3396  /// Emit a leaf store of a single value. This is called at the leaves of the
3397  /// recursive emission to actually produce stores.
3398  void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
3399  assert(Ty->isSingleValueType());
3400  // Extract the single value and store it using the indices.
3401  //
3402  // The gep and extractvalue values are factored out of the CreateStore
3403  // call to make the output independent of the argument evaluation order.
3404  Value *ExtractValue =
3405  IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
3406  Value *InBoundsGEP =
3407  IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3408  StoreInst *Store =
3409  IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
3410  if (AATags)
3411  Store->setAAMetadata(AATags);
3412  LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3413  }
3414  };
3415 
3416  bool visitStoreInst(StoreInst &SI) {
3417  if (!SI.isSimple() || SI.getPointerOperand() != *U)
3418  return false;
3419  Value *V = SI.getValueOperand();
3420  if (V->getType()->isSingleValueType())
3421  return false;
3422 
3423  // We have an aggregate being stored, split it apart.
3424  LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3425  AAMDNodes AATags;
3426  SI.getAAMetadata(AATags);
3427  StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
3428  getAdjustedAlignment(&SI, 0, DL), DL);
3429  Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
3430  SI.eraseFromParent();
3431  return true;
3432  }
3433 
3434  bool visitBitCastInst(BitCastInst &BC) {
3435  enqueueUsers(BC);
3436  return false;
3437  }
3438 
3439  bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
3440  enqueueUsers(ASC);
3441  return false;
3442  }
3443 
3444  bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
3445  enqueueUsers(GEPI);
3446  return false;
3447  }
3448 
3449  bool visitPHINode(PHINode &PN) {
3450  enqueueUsers(PN);
3451  return false;
3452  }
3453 
3454  bool visitSelectInst(SelectInst &SI) {
3455  enqueueUsers(SI);
3456  return false;
3457  }
3458 };
3459 
3460 } // end anonymous namespace
3461 
3462 /// Strip aggregate type wrapping.
3463 ///
3464 /// This removes no-op aggregate types wrapping an underlying type. It will
3465 /// strip as many layers of types as it can without changing either the type
3466 /// size or the allocated size.
3468  if (Ty->isSingleValueType())
3469  return Ty;
3470 
3471  uint64_t AllocSize = DL.getTypeAllocSize(Ty);
3472  uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
3473 
3474  Type *InnerTy;
3475  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3476  InnerTy = ArrTy->getElementType();
3477  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
3478  const StructLayout *SL = DL.getStructLayout(STy);
3479  unsigned Index = SL->getElementContainingOffset(0);
3480  InnerTy = STy->getElementType(Index);
3481  } else {
3482  return Ty;
3483  }
3484 
3485  if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
3486  TypeSize > DL.getTypeSizeInBits(InnerTy))
3487  return Ty;
3488 
3489  return stripAggregateTypeWrapping(DL, InnerTy);
3490 }
3491 
3492 /// Try to find a partition of the aggregate type passed in for a given
3493 /// offset and size.
3494 ///
3495 /// This recurses through the aggregate type and tries to compute a subtype
3496 /// based on the offset and size. When the offset and size span a sub-section
3497 /// of an array, it will even compute a new array type for that sub-section,
3498 /// and the same for structs.
3499 ///
3500 /// Note that this routine is very strict and tries to find a partition of the
3501 /// type which produces the *exact* right offset and size. It is not forgiving
3502 /// when the size or offset cause either end of type-based partition to be off.
3503 /// Also, this is a best-effort routine. It is reasonable to give up and not
3504 /// return a type if necessary.
3505 static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
3506  uint64_t Size) {
3507  if (Offset == 0 && DL.getTypeAllocSize(Ty) == Size)
3508  return stripAggregateTypeWrapping(DL, Ty);
3509  if (Offset > DL.getTypeAllocSize(Ty) ||
3510  (DL.getTypeAllocSize(Ty) - Offset) < Size)
3511  return nullptr;
3512 
3513  if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
3514  Type *ElementTy = SeqTy->getElementType();
3515  uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
3516  uint64_t NumSkippedElements = Offset / ElementSize;
3517  if (NumSkippedElements >= SeqTy->getNumElements())
3518  return nullptr;
3519  Offset -= NumSkippedElements * ElementSize;
3520 
3521  // First check if we need to recurse.
3522  if (Offset > 0 || Size < ElementSize) {
3523  // Bail if the partition ends in a different array element.
3524  if ((Offset + Size) > ElementSize)
3525  return nullptr;
3526  // Recurse through the element type trying to peel off offset bytes.
3527  return getTypePartition(DL, ElementTy, Offset, Size);
3528  }
3529  assert(Offset == 0);
3530 
3531  if (Size == ElementSize)
3532  return stripAggregateTypeWrapping(DL, ElementTy);
3533  assert(Size > ElementSize);
3534  uint64_t NumElements = Size / ElementSize;
3535  if (NumElements * ElementSize != Size)
3536  return nullptr;
3537  return ArrayType::get(ElementTy, NumElements);
3538  }
3539 
3540  StructType *STy = dyn_cast<StructType>(Ty);
3541  if (!STy)
3542  return nullptr;
3543 
3544  const StructLayout *SL = DL.getStructLayout(STy);
3545  if (Offset >= SL->getSizeInBytes())
3546  return nullptr;
3547  uint64_t EndOffset = Offset + Size;
3548  if (EndOffset > SL->getSizeInBytes())
3549  return nullptr;
3550 
3551  unsigned Index = SL->getElementContainingOffset(Offset);
3552  Offset -= SL->getElementOffset(Index);
3553 
3554  Type *ElementTy = STy->getElementType(Index);
3555  uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
3556  if (Offset >= ElementSize)
3557  return nullptr; // The offset points into alignment padding.
3558 
3559  // See if any partition must be contained by the element.
3560  if (Offset > 0 || Size < ElementSize) {
3561  if ((Offset + Size) > ElementSize)
3562  return nullptr;
3563  return getTypePartition(DL, ElementTy, Offset, Size);
3564  }
3565  assert(Offset == 0);
3566 
3567  if (Size == ElementSize)
3568  return stripAggregateTypeWrapping(DL, ElementTy);
3569 
3571  EE = STy->element_end();
3572  if (EndOffset < SL->getSizeInBytes()) {
3573  unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
3574  if (Index == EndIndex)
3575  return nullptr; // Within a single element and its padding.
3576 
3577  // Don't try to form "natural" types if the elements don't line up with the
3578  // expected size.
3579  // FIXME: We could potentially recurse down through the last element in the
3580  // sub-struct to find a natural end point.
3581  if (SL->getElementOffset(EndIndex) != EndOffset)
3582  return nullptr;
3583 
3584  assert(Index < EndIndex);
3585  EE = STy->element_begin() + EndIndex;
3586  }
3587 
3588  // Try to build up a sub-structure.
3589  StructType *SubTy =
3590  StructType::get(STy->getContext(), makeArrayRef(EI, EE), STy->isPacked());
3591  const StructLayout *SubSL = DL.getStructLayout(SubTy);
3592  if (Size != SubSL->getSizeInBytes())
3593  return nullptr; // The sub-struct doesn't have quite the size needed.
3594 
3595  return SubTy;
3596 }
3597 
3598 /// Pre-split loads and stores to simplify rewriting.
3599 ///
3600 /// We want to break up the splittable load+store pairs as much as
3601 /// possible. This is important to do as a preprocessing step, as once we
3602 /// start rewriting the accesses to partitions of the alloca we lose the
3603 /// necessary information to correctly split apart paired loads and stores
3604 /// which both point into this alloca. The case to consider is something like
3605 /// the following:
3606 ///
3607 /// %a = alloca [12 x i8]
3608 /// %gep1 = getelementptr [12 x i8]* %a, i32 0, i32 0
3609 /// %gep2 = getelementptr [12 x i8]* %a, i32 0, i32 4
3610 /// %gep3 = getelementptr [12 x i8]* %a, i32 0, i32 8
3611 /// %iptr1 = bitcast i8* %gep1 to i64*
3612 /// %iptr2 = bitcast i8* %gep2 to i64*
3613 /// %fptr1 = bitcast i8* %gep1 to float*
3614 /// %fptr2 = bitcast i8* %gep2 to float*
3615 /// %fptr3 = bitcast i8* %gep3 to float*
3616 /// store float 0.0, float* %fptr1
3617 /// store float 1.0, float* %fptr2
3618 /// %v = load i64* %iptr1
3619 /// store i64 %v, i64* %iptr2
3620 /// %f1 = load float* %fptr2
3621 /// %f2 = load float* %fptr3
3622 ///
3623 /// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
3624 /// promote everything so we recover the 2 SSA values that should have been
3625 /// there all along.
3626 ///
3627 /// \returns true if any changes are made.
3628 bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
3629  LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
3630 
3631  // Track the loads and stores which are candidates for pre-splitting here, in
3632  // the order they first appear during the partition scan. These give stable
3633  // iteration order and a basis for tracking which loads and stores we
3634  // actually split.
3637 
3638  // We need to accumulate the splits required of each load or store where we
3639  // can find them via a direct lookup. This is important to cross-check loads
3640  // and stores against each other. We also track the slice so that we can kill
3641  // all the slices that end up split.
3642  struct SplitOffsets {
3643  Slice *S;
3644  std::vector<uint64_t> Splits;
3645  };
3647 
3648  // Track loads out of this alloca which cannot, for any reason, be pre-split.
3649  // This is important as we also cannot pre-split stores of those loads!
3650  // FIXME: This is all pretty gross. It means that we can be more aggressive
3651  // in pre-splitting when the load feeding the store happens to come from
3652  // a separate alloca. Put another way, the effectiveness of SROA would be
3653  // decreased by a frontend which just concatenated all of its local allocas
3654  // into one big flat alloca. But defeating such patterns is exactly the job
3655  // SROA is tasked with! Sadly, to not have this discrepancy we would have
3656  // change store pre-splitting to actually force pre-splitting of the load
3657  // that feeds it *and all stores*. That makes pre-splitting much harder, but
3658  // maybe it would make it more principled?
3659  SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
3660 
3661  LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
3662  for (auto &P : AS.partitions()) {
3663  for (Slice &S : P) {
3664  Instruction *I = cast<Instruction>(S.getUse()->getUser());
3665  if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
3666  // If this is a load we have to track that it can't participate in any
3667  // pre-splitting. If this is a store of a load we have to track that
3668  // that load also can't participate in any pre-splitting.
3669  if (auto *LI = dyn_cast<LoadInst>(I))
3670  UnsplittableLoads.insert(LI);
3671  else if (auto *SI = dyn_cast<StoreInst>(I))
3672  if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
3673  UnsplittableLoads.insert(LI);
3674  continue;
3675  }
3676  assert(P.endOffset() > S.beginOffset() &&
3677  "Empty or backwards partition!");
3678 
3679  // Determine if this is a pre-splittable slice.
3680  if (auto *LI = dyn_cast<LoadInst>(I)) {
3681  assert(!LI->isVolatile() && "Cannot split volatile loads!");
3682 
3683  // The load must be used exclusively to store into other pointers for
3684  // us to be able to arbitrarily pre-split it. The stores must also be
3685  // simple to avoid changing semantics.
3686  auto IsLoadSimplyStored = [](LoadInst *LI) {
3687  for (User *LU : LI->users()) {
3688  auto *SI = dyn_cast<StoreInst>(LU);
3689  if (!SI || !SI->isSimple())
3690  return false;
3691  }
3692  return true;
3693  };
3694  if (!IsLoadSimplyStored(LI)) {
3695  UnsplittableLoads.insert(LI);
3696  continue;
3697  }
3698 
3699  Loads.push_back(LI);
3700  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3701  if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
3702  // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
3703  continue;
3704  auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
3705  if (!StoredLoad || !StoredLoad->isSimple())
3706  continue;
3707  assert(!SI->isVolatile() && "Cannot split volatile stores!");
3708 
3709  Stores.push_back(SI);
3710  } else {
3711  // Other uses cannot be pre-split.
3712  continue;
3713  }
3714 
3715  // Record the initial split.
3716  LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
3717  auto &Offsets = SplitOffsetsMap[I];
3718  assert(Offsets.Splits.empty() &&
3719  "Should not have splits the first time we see an instruction!");
3720  Offsets.S = &S;
3721  Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
3722  }
3723 
3724  // Now scan the already split slices, and add a split for any of them which
3725  // we're going to pre-split.
3726  for (Slice *S : P.splitSliceTails()) {
3727  auto SplitOffsetsMapI =
3728  SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
3729  if (SplitOffsetsMapI == SplitOffsetsMap.end())
3730  continue;
3731  auto &Offsets = SplitOffsetsMapI->second;
3732 
3733  assert(Offsets.S == S && "Found a mismatched slice!");
3734  assert(!Offsets.Splits.empty() &&
3735  "Cannot have an empty set of splits on the second partition!");
3736  assert(Offsets.Splits.back() ==
3737  P.beginOffset() - Offsets.S->beginOffset() &&
3738  "Previous split does not end where this one begins!");
3739 
3740  // Record each split. The last partition's end isn't needed as the size
3741  // of the slice dictates that.
3742  if (S->endOffset() > P.endOffset())
3743  Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
3744  }
3745  }
3746 
3747  // We may have split loads where some of their stores are split stores. For
3748  // such loads and stores, we can only pre-split them if their splits exactly
3749  // match relative to their starting offset. We have to verify this prior to
3750  // any rewriting.
3751  Stores.erase(
3752  llvm::remove_if(Stores,
3753  [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
3754  // Lookup the load we are storing in our map of split
3755  // offsets.
3756  auto *LI = cast<LoadInst>(SI->getValueOperand());
3757  // If it was completely unsplittable, then we're done,
3758  // and this store can't be pre-split.
3759  if (UnsplittableLoads.count(LI))
3760  return true;
3761 
3762  auto LoadOffsetsI = SplitOffsetsMap.find(LI);
3763  if (LoadOffsetsI == SplitOffsetsMap.end())
3764  return false; // Unrelated loads are definitely safe.
3765  auto &LoadOffsets = LoadOffsetsI->second;
3766 
3767  // Now lookup the store's offsets.
3768  auto &StoreOffsets = SplitOffsetsMap[SI];
3769 
3770  // If the relative offsets of each split in the load and
3771  // store match exactly, then we can split them and we
3772  // don't need to remove them here.
3773  if (LoadOffsets.Splits == StoreOffsets.Splits)
3774  return false;
3775 
3776  LLVM_DEBUG(
3777  dbgs()
3778  << " Mismatched splits for load and store:\n"
3779  << " " << *LI << "\n"
3780  << " " << *SI << "\n");
3781 
3782  // We've found a store and load that we need to split
3783  // with mismatched relative splits. Just give up on them
3784  // and remove both instructions from our list of
3785  // candidates.
3786  UnsplittableLoads.insert(LI);
3787  return true;
3788  }),
3789  Stores.end());
3790  // Now we have to go *back* through all the stores, because a later store may
3791  // have caused an earlier store's load to become unsplittable and if it is
3792  // unsplittable for the later store, then we can't rely on it being split in
3793  // the earlier store either.
3794  Stores.erase(llvm::remove_if(Stores,
3795  [&UnsplittableLoads](StoreInst *SI) {
3796  auto *LI =
3797  cast<LoadInst>(SI->getValueOperand());
3798  return UnsplittableLoads.count(LI);
3799  }),
3800  Stores.end());
3801  // Once we've established all the loads that can't be split for some reason,
3802  // filter any that made it into our list out.
3803  Loads.erase(llvm::remove_if(Loads,
3804  [&UnsplittableLoads](LoadInst *LI) {
3805  return UnsplittableLoads.count(LI);
3806  }),
3807  Loads.end());
3808 
3809  // If no loads or stores are left, there is no pre-splitting to be done for
3810  // this alloca.
3811  if (Loads.empty() && Stores.empty())
3812  return false;
3813 
3814  // From here on, we can't fail and will be building new accesses, so rig up
3815  // an IR builder.
3816  IRBuilderTy IRB(&AI);
3817 
3818  // Collect the new slices which we will merge into the alloca slices.
3819  SmallVector<Slice, 4> NewSlices;
3820 
3821  // Track any allocas we end up splitting loads and stores for so we iterate
3822  // on them.
3823  SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
3824 
3825  // At this point, we have collected all of the loads and stores we can
3826  // pre-split, and the specific splits needed for them. We actually do the
3827  // splitting in a specific order in order to handle when one of the loads in
3828  // the value operand to one of the stores.
3829  //
3830  // First, we rewrite all of the split loads, and just accumulate each split
3831  // load in a parallel structure. We also build the slices for them and append
3832  // them to the alloca slices.
3834  std::vector<LoadInst *> SplitLoads;
3835  const DataLayout &DL = AI.getModule()->getDataLayout();
3836  for (LoadInst *LI : Loads) {
3837  SplitLoads.clear();
3838 
3839  IntegerType *Ty = cast<IntegerType>(LI->getType());
3840  uint64_t LoadSize = Ty->getBitWidth() / 8;
3841  assert(LoadSize > 0 && "Cannot have a zero-sized integer load!");
3842 
3843  auto &Offsets = SplitOffsetsMap[LI];
3844  assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
3845  "Slice size should always match load size exactly!");
3846  uint64_t BaseOffset = Offsets.S->beginOffset();
3847  assert(BaseOffset + LoadSize > BaseOffset &&
3848  "Cannot represent alloca access size using 64-bit integers!");
3849 
3850  Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
3851  IRB.SetInsertPoint(LI);
3852 
3853  LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
3854 
3855  uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
3856  int Idx = 0, Size = Offsets.Splits.size();
3857  for (;;) {
3858  auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
3859  auto AS = LI->getPointerAddressSpace();
3860  auto *PartPtrTy = PartTy->getPointerTo(AS);
3861  LoadInst *PLoad = IRB.CreateAlignedLoad(
3862  PartTy,
3863  getAdjustedPtr(IRB, DL, BasePtr,
3864  APInt(DL.getIndexSizeInBits(AS), PartOffset),
3865  PartPtrTy, BasePtr->getName() + "."),
3866  getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
3867  LI->getName());
3868  PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
3869  LLVMContext::MD_access_group});
3870 
3871  // Append this load onto the list of split loads so we can find it later
3872  // to rewrite the stores.
3873  SplitLoads.push_back(PLoad);
3874 
3875  // Now build a new slice for the alloca.
3876  NewSlices.push_back(
3877  Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3878  &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
3879  /*IsSplittable*/ false));
3880  LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
3881  << ", " << NewSlices.back().endOffset()
3882  << "): " << *PLoad << "\n");
3883 
3884  // See if we've handled all the splits.
3885  if (Idx >= Size)
3886  break;
3887 
3888  // Setup the next partition.
3889  PartOffset = Offsets.Splits[Idx];
3890  ++Idx;
3891  PartSize = (Idx < Size ? Offsets.Splits[Idx] : LoadSize) - PartOffset;
3892  }
3893 
3894  // Now that we have the split loads, do the slow walk over all uses of the
3895  // load and rewrite them as split stores, or save the split loads to use
3896  // below if the store is going to be split there anyways.
3897  bool DeferredStores = false;
3898  for (User *LU : LI->users()) {
3899  StoreInst *SI = cast<StoreInst>(LU);
3900  if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
3901  DeferredStores = true;
3902  LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
3903  << "\n");
3904  continue;
3905  }
3906 
3907  Value *StoreBasePtr = SI->getPointerOperand();
3908  IRB.SetInsertPoint(SI);
3909 
3910  LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
3911 
3912  for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
3913  LoadInst *PLoad = SplitLoads[Idx];
3914  uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
3915  auto *PartPtrTy =
3916  PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
3917 
3918  auto AS = SI->getPointerAddressSpace();
3919  StoreInst *PStore = IRB.CreateAlignedStore(
3920  PLoad,
3921  getAdjustedPtr(IRB, DL, StoreBasePtr,
3922  APInt(DL.getIndexSizeInBits(AS), PartOffset),
3923  PartPtrTy, StoreBasePtr->getName() + "."),
3924  getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
3925  PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
3926  LLVMContext::MD_access_group});
3927  LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
3928  }
3929 
3930  // We want to immediately iterate on any allocas impacted by splitting
3931  // this store, and we have to track any promotable alloca (indicated by
3932  // a direct store) as needing to be resplit because it is no longer
3933  // promotable.
3934  if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
3935  ResplitPromotableAllocas.insert(OtherAI);
3936  Worklist.insert(OtherAI);
3937  } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3938  StoreBasePtr->stripInBoundsOffsets())) {
3939  Worklist.insert(OtherAI);
3940  }
3941 
3942  // Mark the original store as dead.
3943  DeadInsts.insert(SI);
3944  }
3945 
3946  // Save the split loads if there are deferred stores among the users.
3947  if (DeferredStores)
3948  SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
3949 
3950  // Mark the original load as dead and kill the original slice.
3951  DeadInsts.insert(LI);
3952  Offsets.S->kill();
3953  }
3954 
3955  // Second, we rewrite all of the split stores. At this point, we know that
3956  // all loads from this alloca have been split already. For stores of such
3957  // loads, we can simply look up the pre-existing split loads. For stores of
3958  // other loads, we split those loads first and then write split stores of
3959  // them.
3960  for (StoreInst *SI : Stores) {
3961  auto *LI = cast<LoadInst>(SI->getValueOperand());
3962  IntegerType *Ty = cast<IntegerType>(LI->getType());
3963  uint64_t StoreSize = Ty->getBitWidth() / 8;
3964  assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
3965 
3966  auto &Offsets = SplitOffsetsMap[SI];
3967  assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
3968  "Slice size should always match load size exactly!");
3969  uint64_t BaseOffset = Offsets.S->beginOffset();
3970  assert(BaseOffset + StoreSize > BaseOffset &&
3971  "Cannot represent alloca access size using 64-bit integers!");
3972 
3973  Value *LoadBasePtr = LI->getPointerOperand();
3974  Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
3975 
3976  LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
3977 
3978  // Check whether we have an already split load.
3979  auto SplitLoadsMapI = SplitLoadsMap.find(LI);
3980  std::vector<LoadInst *> *SplitLoads = nullptr;
3981  if (SplitLoadsMapI != SplitLoadsMap.end()) {
3982  SplitLoads = &SplitLoadsMapI->second;
3983  assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
3984  "Too few split loads for the number of splits in the store!");
3985  } else {
3986  LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
3987  }
3988 
3989  uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
3990  int Idx = 0, Size = Offsets.Splits.size();
3991  for (;;) {
3992  auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
3993  auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
3994  auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
3995 
3996  // Either lookup a split load or create one.
3997  LoadInst *PLoad;
3998  if (SplitLoads) {
3999  PLoad = (*SplitLoads)[Idx];
4000  } else {
4001  IRB.SetInsertPoint(LI);
4002  auto AS = LI->getPointerAddressSpace();
4003  PLoad = IRB.CreateAlignedLoad(
4004  PartTy,
4005  getAdjustedPtr(IRB, DL, LoadBasePtr,
4006  APInt(DL.getIndexSizeInBits(AS), PartOffset),
4007  LoadPartPtrTy, LoadBasePtr->getName() + "."),
4008  getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
4009  LI->getName());
4010  }
4011 
4012  // And store this partition.
4013  IRB.SetInsertPoint(SI);
4014  auto AS = SI->getPointerAddressSpace();
4015  StoreInst *PStore = IRB.CreateAlignedStore(
4016  PLoad,
4017  getAdjustedPtr(IRB, DL, StoreBasePtr,
4018  APInt(DL.getIndexSizeInBits(AS), PartOffset),
4019  StorePartPtrTy, StoreBasePtr->getName() + "."),
4020  getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
4021 
4022  // Now build a new slice for the alloca.
4023  NewSlices.push_back(
4024  Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4025  &PStore->getOperandUse(PStore->getPointerOperandIndex()),
4026  /*IsSplittable*/ false));
4027  LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4028  << ", " << NewSlices.back().endOffset()
4029  << "): " << *PStore << "\n");
4030  if (!SplitLoads) {
4031  LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
4032  }
4033 
4034  // See if we've finished all the splits.
4035  if (Idx >= Size)
4036  break;
4037 
4038  // Setup the next partition.
4039  PartOffset = Offsets.Splits[Idx];
4040  ++Idx;
4041  PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
4042  }
4043 
4044  // We want to immediately iterate on any allocas impacted by splitting
4045  // this load, which is only relevant if it isn't a load of this alloca and
4046  // thus we didn't already split the loads above. We also have to keep track
4047  // of any promotable allocas we split loads on as they can no longer be
4048  // promoted.
4049  if (!SplitLoads) {
4050  if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4051  assert(OtherAI != &AI && "We can't re-split our own alloca!");
4052  ResplitPromotableAllocas.insert(OtherAI);
4053  Worklist.insert(OtherAI);
4054  } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4055  LoadBasePtr->stripInBoundsOffsets())) {
4056  assert(OtherAI != &AI && "We can't re-split our own alloca!");
4057  Worklist.insert(OtherAI);
4058  }
4059  }
4060 
4061  // Mark the original store as dead now that we've split it up and kill its
4062  // slice. Note that we leave the original load in place unless this store
4063  // was its only use. It may in turn be split up if it is an alloca load
4064  // for some other alloca, but it may be a normal load. This may introduce
4065  // redundant loads, but where those can be merged the rest of the optimizer
4066  // should handle the merging, and this uncovers SSA splits which is more
4067  // important. In practice, the original loads will almost always be fully
4068  // split and removed eventually, and the splits will be merged by any
4069  // trivial CSE, including instcombine.
4070  if (LI->hasOneUse()) {
4071  assert(*LI->user_begin() == SI && "Single use isn't this store!");
4072  DeadInsts.insert(LI);
4073  }
4074  DeadInsts.insert(SI);
4075  Offsets.S->kill();
4076  }
4077 
4078  // Remove the killed slices that have ben pre-split.
4079  AS.erase(llvm::remove_if(AS, [](const Slice &S) { return S.isDead(); }),
4080  AS.end());
4081 
4082  // Insert our new slices. This will sort and merge them into the sorted
4083  // sequence.
4084  AS.insert(NewSlices);
4085 
4086  LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
4087 #ifndef NDEBUG
4088  for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
4089  LLVM_DEBUG(AS.print(dbgs(), I, " "));
4090 #endif
4091 
4092  // Finally, don't try to promote any allocas that new require re-splitting.
4093  // They have already been added to the worklist above.
4094  PromotableAllocas.erase(
4096  PromotableAllocas,
4097  [&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }),
4098  PromotableAllocas.end());
4099 
4100  return true;
4101 }
4102 
4103 /// Rewrite an alloca partition's users.
4104 ///
4105 /// This routine drives both of the rewriting goals of the SROA pass. It tries
4106 /// to rewrite uses of an alloca partition to be conducive for SSA value
4107 /// promotion. If the partition needs a new, more refined alloca, this will
4108 /// build that new alloca, preserving as much type information as possible, and
4109 /// rewrite the uses of the old alloca to point at the new one and have the
4110 /// appropriate new offsets. It also evaluates how successful the rewrite was
4111 /// at enabling promotion and if it was successful queues the alloca to be
4112 /// promoted.
4113 AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
4114  Partition &P) {
4115  // Try to compute a friendly type for this partition of the alloca. This
4116  // won't always succeed, in which case we fall back to a legal integer type
4117  // or an i8 array of an appropriate size.
4118  Type *SliceTy = nullptr;
4119  const DataLayout &DL = AI.getModule()->getDataLayout();
4120  if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
4121  if (DL.getTypeAllocSize(CommonUseTy) >= P.size())
4122  SliceTy = CommonUseTy;
4123  if (!SliceTy)
4124  if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4125  P.beginOffset(), P.size()))
4126  SliceTy = TypePartitionTy;
4127  if ((!SliceTy || (SliceTy->isArrayTy() &&
4128  SliceTy->getArrayElementType()->isIntegerTy())) &&
4129  DL.isLegalInteger(P.size() * 8))
4130  SliceTy = Type::getIntNTy(*C, P.size() * 8);
4131  if (!SliceTy)
4132  SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
4133  assert(DL.getTypeAllocSize(SliceTy) >= P.size());
4134 
4135  bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
4136 
4137  VectorType *VecTy =
4138  IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
4139  if (VecTy)
4140  SliceTy = VecTy;
4141 
4142  // Check for the case where we're going to rewrite to a new alloca of the
4143  // exact same type as the original, and with the same access offsets. In that
4144  // case, re-use the existing alloca, but still run through the rewriter to
4145  // perform phi and select speculation.
4146  // P.beginOffset() can be non-zero even with the same type in a case with
4147  // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
4148  AllocaInst *NewAI;
4149  if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
4150  NewAI = &AI;
4151  // FIXME: We should be able to bail at this point with "nothing changed".
4152  // FIXME: We might want to defer PHI speculation until after here.
4153  // FIXME: return nullptr;
4154  } else {
4155  unsigned Alignment = AI.getAlignment();
4156  if (!Alignment) {
4157  // The minimum alignment which users can rely on when the explicit
4158  // alignment is omitted or zero is that required by the ABI for this
4159  // type.
4160  Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
4161  }
4162  Alignment = MinAlign(Alignment, P.beginOffset());
4163  // If we will get at least this much alignment from the type alone, leave
4164  // the alloca's alignment unconstrained.
4165  if (Alignment <= DL.getABITypeAlignment(SliceTy))
4166  Alignment = 0;
4167  NewAI = new AllocaInst(
4168  SliceTy, AI.getType()->getAddressSpace(), nullptr, Alignment,
4169  AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
4170  // Copy the old AI debug location over to the new one.
4171  NewAI->setDebugLoc(AI.getDebugLoc());
4172  ++NumNewAllocas;
4173  }
4174 
4175  LLVM_DEBUG(dbgs() << "Rewriting alloca partition "
4176  << "[" << P.beginOffset() << "," << P.endOffset()
4177  << ") to: " << *NewAI << "\n");
4178 
4179  // Track the high watermark on the worklist as it is only relevant for
4180  // promoted allocas. We will reset it to this point if the alloca is not in
4181  // fact scheduled for promotion.
4182  unsigned PPWOldSize = PostPromotionWorklist.size();
4183  unsigned NumUses = 0;
4185  SmallSetVector<SelectInst *, 8> SelectUsers;
4186 
4187  AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
4188  P.endOffset(), IsIntegerPromotable, VecTy,
4189  PHIUsers, SelectUsers);
4190  bool Promotable = true;
4191  for (Slice *S : P.splitSliceTails()) {
4192  Promotable &= Rewriter.visit(S);
4193  ++NumUses;
4194  }
4195  for (Slice &S : P) {
4196  Promotable &= Rewriter.visit(&S);
4197  ++NumUses;
4198  }
4199 
4200  NumAllocaPartitionUses += NumUses;
4201  MaxUsesPerAllocaPartition.updateMax(NumUses);
4202 
4203  // Now that we've processed all the slices in the new partition, check if any
4204  // PHIs or Selects would block promotion.
4205  for (PHINode *PHI : PHIUsers)
4206  if (!isSafePHIToSpeculate(*PHI)) {
4207  Promotable = false;
4208  PHIUsers.clear();
4209  SelectUsers.clear();
4210  break;
4211  }
4212 
4213  for (SelectInst *Sel : SelectUsers)
4214  if (!isSafeSelectToSpeculate(*Sel)) {
4215  Promotable = false;
4216  PHIUsers.clear();
4217  SelectUsers.clear();
4218  break;
4219  }
4220 
4221  if (Promotable) {
4222  if (PHIUsers.empty() && SelectUsers.empty()) {
4223  // Promote the alloca.
4224  PromotableAllocas.push_back(NewAI);
4225  } else {
4226  // If we have either PHIs or Selects to speculate, add them to those
4227  // worklists and re-queue the new alloca so that we promote in on the
4228  // next iteration.
4229  for (PHINode *PHIUser : PHIUsers)
4230  SpeculatablePHIs.insert(PHIUser);
4231  for (SelectInst *SelectUser : SelectUsers)
4232  SpeculatableSelects.insert(SelectUser);
4233  Worklist.insert(NewAI);
4234  }
4235  } else {
4236  // Drop any post-promotion work items if promotion didn't happen.
4237  while (PostPromotionWorklist.size() > PPWOldSize)
4238  PostPromotionWorklist.pop_back();
4239 
4240  // We couldn't promote and we didn't create a new partition, nothing
4241  // happened.
4242  if (NewAI == &AI)
4243  return nullptr;
4244 
4245  // If we can't promote the alloca, iterate on it to check for new
4246  // refinements exposed by splitting the current alloca. Don't iterate on an
4247  // alloca which didn't actually change and didn't get promoted.
4248  Worklist.insert(NewAI);
4249  }
4250 
4251  return NewAI;
4252 }
4253 
4254 /// Walks the slices of an alloca and form partitions based on them,
4255 /// rewriting each of their uses.
4256 bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
4257  if (AS.begin() == AS.end())
4258  return false;
4259 
4260  unsigned NumPartitions = 0;
4261  bool Changed = false;
4262  const DataLayout &DL = AI.getModule()->getDataLayout();
4263 
4264  // First try to pre-split loads and stores.
4265  Changed |= presplitLoadsAndStores(AI, AS);
4266 
4267  // Now that we have identified any pre-splitting opportunities,
4268  // mark loads and stores unsplittable except for the following case.
4269  // We leave a slice splittable if all other slices are disjoint or fully
4270  // included in the slice, such as whole-alloca loads and stores.
4271  // If we fail to split these during pre-splitting, we want to force them
4272  // to be rewritten into a partition.
4273  bool IsSorted = true;
4274 
4275  uint64_t AllocaSize = DL.getTypeAllocSize(AI.getAllocatedType());
4276  const uint64_t MaxBitVectorSize = 1024;
4277  if (AllocaSize <= MaxBitVectorSize) {
4278  // If a byte boundary is included in any load or store, a slice starting or
4279  // ending at the boundary is not splittable.
4280  SmallBitVector SplittableOffset(AllocaSize + 1, true);
4281  for (Slice &S : AS)
4282  for (unsigned O = S.beginOffset() + 1;
4283  O < S.endOffset() && O < AllocaSize; O++)
4284  SplittableOffset.reset(O);
4285 
4286  for (Slice &S : AS) {
4287  if (!S.isSplittable())
4288  continue;
4289 
4290  if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
4291  (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
4292  continue;
4293 
4294  if (isa<LoadInst>(S.getUse()->getUser()) ||
4295  isa<StoreInst>(S.getUse()->getUser())) {
4296  S.makeUnsplittable();
4297  IsSorted = false;
4298  }
4299  }
4300  }
4301  else {
4302  // We only allow whole-alloca splittable loads and stores
4303  // for a large alloca to avoid creating too large BitVector.
4304  for (Slice &S : AS) {
4305  if (!S.isSplittable())
4306  continue;
4307 
4308  if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
4309  continue;
4310 
4311  if (isa<LoadInst>(S.getUse()->getUser()) ||
4312  isa<StoreInst>(S.getUse()->getUser())) {
4313  S.makeUnsplittable();
4314  IsSorted = false;
4315  }
4316  }
4317  }
4318 
4319  if (!IsSorted)
4320  llvm::sort(AS);
4321 
4322  /// Describes the allocas introduced by rewritePartition in order to migrate
4323  /// the debug info.
4324  struct Fragment {
4325  AllocaInst *Alloca;
4326  uint64_t Offset;
4327  uint64_t Size;
4328  Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
4329  : Alloca(AI), Offset(O), Size(S) {}
4330  };
4331  SmallVector<Fragment, 4> Fragments;
4332 
4333  // Rewrite each partition.
4334  for (auto &P : AS.partitions()) {
4335  if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
4336  Changed = true;
4337  if (NewAI != &AI) {
4338  uint64_t SizeOfByte = 8;
4339  uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
4340  // Don't include any padding.
4341  uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
4342  Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
4343  }
4344  }
4345  ++NumPartitions;
4346  }
4347 
4348  NumAllocaPartitions += NumPartitions;
4349  MaxPartitionsPerAlloca.updateMax(NumPartitions);
4350 
4351  // Migrate debug information from the old alloca to the new alloca(s)
4352  // and the individual partitions.
4354  if (!DbgDeclares.empty()) {
4355  auto *Var = DbgDeclares.front()->getVariable();
4356  auto *Expr = DbgDeclares.front()->getExpression();
4357  auto VarSize = Var->getSizeInBits();
4358  DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
4359  uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType());
4360  for (auto Fragment : Fragments) {
4361  // Create a fragment expression describing the new partition or reuse AI's
4362  // expression if there is only one partition.
4363  auto *FragmentExpr = Expr;
4364  if (Fragment.Size < AllocaSize || Expr->isFragment()) {
4365  // If this alloca is already a scalar replacement of a larger aggregate,
4366  // Fragment.Offset describes the offset inside the scalar.
4367  auto ExprFragment = Expr->getFragmentInfo();
4368  uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
4369  uint64_t Start = Offset + Fragment.Offset;
4370  uint64_t Size = Fragment.Size;
4371  if (ExprFragment) {
4372  uint64_t AbsEnd =
4373  ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
4374  if (Start >= AbsEnd)
4375  // No need to describe a SROAed padding.
4376  continue;
4377  Size = std::min(Size, AbsEnd - Start);
4378  }
4379  // The new, smaller fragment is stenciled out from the old fragment.
4380  if (auto OrigFragment = FragmentExpr->getFragmentInfo()) {
4381  assert(Start >= OrigFragment->OffsetInBits &&
4382  "new fragment is outside of original fragment");
4383  Start -= OrigFragment->OffsetInBits;
4384  }
4385 
4386  // The alloca may be larger than the variable.
4387  if (VarSize) {
4388  if (Size > *VarSize)
4389  Size = *VarSize;
4390  if (Size == 0 || Start + Size > *VarSize)
4391  continue;
4392  }
4393 
4394  // Avoid creating a fragment expression that covers the entire variable.
4395  if (!VarSize || *VarSize != Size) {
4396  if (auto E =
4397  DIExpression::createFragmentExpression(Expr, Start, Size))
4398  FragmentExpr = *E;
4399  else
4400  continue;
4401  }
4402  }
4403 
4404  // Remove any existing intrinsics describing the same alloca.
4405  for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
4406  OldDII->eraseFromParent();
4407 
4408  DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
4409  DbgDeclares.front()->getDebugLoc(), &AI);
4410  }
4411  }
4412  return Changed;
4413 }
4414 
4415 /// Clobber a use with undef, deleting the used value if it becomes dead.
4416 void SROA::clobberUse(Use &U) {
4417  Value *OldV = U;
4418  // Replace the use with an undef value.
4419  U = UndefValue::get(OldV->getType());
4420 
4421  // Check for this making an instruction dead. We have to garbage collect
4422  // all the dead instructions to ensure the uses of any alloca end up being
4423  // minimal.
4424  if (Instruction *OldI = dyn_cast<Instruction>(OldV))
4425  if (isInstructionTriviallyDead(OldI)) {
4426  DeadInsts.insert(OldI);
4427  }
4428 }
4429 
4430 /// Analyze an alloca for SROA.
4431 ///
4432 /// This analyzes the alloca to ensure we can reason about it, builds
4433 /// the slices of the alloca, and then hands it off to be split and
4434 /// rewritten as needed.
4435 bool SROA::runOnAlloca(AllocaInst &AI) {
4436  LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
4437  ++NumAllocasAnalyzed;
4438 
4439  // Special case dead allocas, as they're trivial.
4440  if (AI.use_empty()) {
4441  AI.eraseFromParent();
4442  return true;
4443  }
4444  const DataLayout &DL = AI.getModule()->getDataLayout();
4445 
4446  // Skip alloca forms that this analysis can't handle.
4447  if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
4448  DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
4449  return false;
4450 
4451  bool Changed = false;
4452 
4453  // First, split any FCA loads and stores touching this alloca to promote
4454  // better splitting and promotion opportunities.
4455  AggLoadStoreRewriter AggRewriter(DL);
4456  Changed |= AggRewriter.rewrite(AI);
4457 
4458  // Build the slices using a recursive instruction-visiting builder.
4459  AllocaSlices AS(DL, AI);
4460  LLVM_DEBUG(AS.print(dbgs()));
4461  if (AS.isEscaped())
4462  return Changed;
4463 
4464  // Delete all the dead users of this alloca before splitting and rewriting it.
4465  for (Instruction *DeadUser : AS.getDeadUsers()) {
4466  // Free up everything used by this instruction.
4467  for (Use &DeadOp : DeadUser->operands())
4468  clobberUse(DeadOp);
4469 
4470  // Now replace the uses of this instruction.
4471  DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType()));
4472 
4473  // And mark it for deletion.
4474  DeadInsts.insert(DeadUser);
4475  Changed = true;
4476  }
4477  for (Use *DeadOp : AS.getDeadOperands()) {
4478  clobberUse(*DeadOp);
4479  Changed = true;
4480  }
4481 
4482  // No slices to split. Leave the dead alloca for a later pass to clean up.
4483  if (AS.begin() == AS.end())
4484  return Changed;
4485 
4486  Changed |= splitAlloca(AI, AS);
4487 
4488  LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
4489  while (!SpeculatablePHIs.empty())
4490  speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val());
4491 
4492  LLVM_DEBUG(dbgs() << " Speculating Selects\n");
4493  while (!SpeculatableSelects.empty())
4494  speculateSelectInstLoads(*SpeculatableSelects.pop_back_val());
4495 
4496  return Changed;
4497 }
4498 
4499 /// Delete the dead instructions accumulated in this run.
4500 ///
4501 /// Recursively deletes the dead instructions we've accumulated. This is done
4502 /// at the very end to maximize locality of the recursive delete and to
4503 /// minimize the problems of invalidated instruction pointers as such pointers
4504 /// are used heavily in the intermediate stages of the algorithm.
4505 ///
4506 /// We also record the alloca instructions deleted here so that they aren't
4507 /// subsequently handed to mem2reg to promote.
4508 bool SROA::deleteDeadInstructions(
4509  SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
4510  bool Changed = false;
4511  while (!DeadInsts.empty()) {
4512  Instruction *I = DeadInsts.pop_back_val();
4513  LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
4514 
4515  // If the instruction is an alloca, find the possible dbg.declare connected
4516  // to it, and remove it too. We must do this before calling RAUW or we will
4517  // not be able to find it.
4518  if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
4519  DeletedAllocas.insert(AI);
4520  for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI))
4521  OldDII->eraseFromParent();
4522  }
4523 
4525 
4526  for (Use &Operand : I->operands())
4527  if (Instruction *U = dyn_cast<Instruction>(Operand)) {
4528  // Zero out the operand and see if it becomes trivially dead.
4529  Operand = nullptr;
4531  DeadInsts.insert(U);
4532  }
4533 
4534  ++NumDeleted;
4535  I->eraseFromParent();
4536  Changed = true;
4537  }
4538  return Changed;
4539 }
4540 
4541 /// Promote the allocas, using the best available technique.
4542 ///
4543 /// This attempts to promote whatever allocas have been identified as viable in
4544 /// the PromotableAllocas list. If that list is empty, there is nothing to do.
4545 /// This function returns whether any promotion occurred.
4546 bool SROA::promoteAllocas(Function &F) {
4547  if (PromotableAllocas.empty())
4548  return false;
4549 
4550  NumPromoted += PromotableAllocas.size();
4551 
4552  LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
4553  PromoteMemToReg(PromotableAllocas, *DT, AC);
4554  PromotableAllocas.clear();
4555  return true;
4556 }
4557 
4558 PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
4559  AssumptionCache &RunAC) {
4560  LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
4561  C = &F.getContext();
4562  DT = &RunDT;
4563  AC = &RunAC;
4564 
4565  BasicBlock &EntryBB = F.getEntryBlock();
4566  for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
4567  I != E; ++I) {
4568  if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
4569  Worklist.insert(AI);
4570  }
4571 
4572  bool Changed = false;
4573  // A set of deleted alloca instruction pointers which should be removed from
4574  // the list of promotable allocas.
4575  SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
4576 
4577  do {
4578  while (!Worklist.empty()) {
4579  Changed |= runOnAlloca(*Worklist.pop_back_val());
4580  Changed |= deleteDeadInstructions(DeletedAllocas);
4581 
4582  // Remove the deleted allocas from various lists so that we don't try to
4583  // continue processing them.
4584  if (!DeletedAllocas.empty()) {
4585  auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
4586  Worklist.remove_if(IsInSet);
4587  PostPromotionWorklist.remove_if(IsInSet);
4588  PromotableAllocas.erase(llvm::remove_if(PromotableAllocas, IsInSet),
4589  PromotableAllocas.end());
4590  DeletedAllocas.clear();
4591  }
4592  }
4593 
4594  Changed |= promoteAllocas(F);
4595 
4596  Worklist = PostPromotionWorklist;
4597  PostPromotionWorklist.clear();
4598  } while (!Worklist.empty());
4599 
4600  if (!Changed)
4601  return PreservedAnalyses::all();
4602 
4603  PreservedAnalyses PA;
4604  PA.preserveSet<CFGAnalyses>();
4605  PA.preserve<GlobalsAA>();
4606  return PA;
4607 }
4608 
4610  return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
4611  AM.getResult<AssumptionAnalysis>(F));
4612 }
4613 
4614 /// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
4615 ///
4616 /// This is in the llvm namespace purely to allow it to be a friend of the \c
4617 /// SROA pass.
4619  /// The SROA implementation.
4620  SROA Impl;
4621 
4622 public:
4623  static char ID;
4624 
4627  }
4628 
4629  bool runOnFunction(Function &F) override {
4630  if (skipFunction(F))
4631  return false;
4632 
4633  auto PA = Impl.runImpl(
4634  F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
4635  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
4636  return !PA.areAllPreserved();
4637  }
4638 
4639  void getAnalysisUsage(AnalysisUsage &AU) const override {
4643  AU.setPreservesCFG();
4644  }
4645 
4646  StringRef getPassName() const override { return "SROA"; }
4647 };
4648 
4649 char SROALegacyPass::ID = 0;
4650 
4652 
4654  "Scalar Replacement Of Aggregates", false, false)
4658  false, false)
Legacy wrapper pass to provide the GlobalsAAResult object.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, Twine NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy...
Definition: SROA.cpp:1578
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL)
Test whether the given alloca partitioning and range of slices can be promoted to a vector...
Definition: SROA.cpp:1885
iterator end() const
Definition: SROA.cpp:409
uint64_t CallInst * C
static Value * getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, SmallVectorImpl< Value *> &Indices, Twine NamePrefix)
Recursively compute indices for a natural GEP.
Definition: SROA.cpp:1464
Value * getValueOperand()
Definition: Instructions.h:415
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
An iterator over partitions of the alloca&#39;s slices.
Definition: SROA.cpp:429
bool isSimple() const
Definition: Instructions.h:281
void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION
static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT)
This is the entry point for all transforms.
iterator_range< use_iterator > uses()
Definition: Value.h:375
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:402
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
Base class for instruction visitors.
Definition: InstVisitor.h:80
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
const Value * stripInBoundsOffsets() const
Strip off pointer casts and inbounds GEPs.
Definition: Value.cpp:604
uint64_t beginOffset() const
The start offset of this partition.
Definition: SROA.cpp:380
This is a &#39;bitvector&#39; (really, a variable-sized bit array), optimized for the case when the array is ...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:777
This class represents lattice values for constants.
Definition: AllocatorList.h:23
PointerTy getPointer() const
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:484
bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:262
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:351
This is the interface for a simple mod/ref and alias analysis over globals.
EltTy front() const
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
iterator begin() const
Definition: ArrayRef.h:136
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1647
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
LLVM_NODISCARD size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition: StringRef.h:359
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:613
This file provides the interface for LLVM&#39;s Scalar Replacement of Aggregates pass.
void push_back(const T &Elt)
Definition: SmallVector.h:211
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
void erase(iterator Start, iterator Stop)
Erase a range of slices.
Definition: SROA.cpp:258
bool isTriviallyEmpty() const
Check if this twine is trivially empty; a false return value does not necessarily mean the twine is e...
Definition: Twine.h:400
This class provides information about the result of a visit.
Definition: PtrUseVisitor.h:62
This class represents a function call, abstracting a target machine&#39;s calling convention.
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2130
This file contains the declarations for metadata subclasses.
Representation of the alloca slices.
Definition: SROA.cpp:231
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned getSourceAlignment() const
Instruction * getAbortingInst() const
Get the instruction causing the visit to abort.
Definition: PtrUseVisitor.h:83
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:390
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
void insert(ArrayRef< Slice > NewSlices)
Insert new slices for this alloca.
Definition: SROA.cpp:265
Value * getValue() const
A cache of @llvm.assume calls within a function.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:252
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1136
unsigned getSourceAddressSpace() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:98
void setSource(Value *Ptr)
void setDest(Value *Ptr)
Set the specified arguments of the instruction.
This class wraps the llvm.memset intrinsic.
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
unsigned second
Scalar Replacement Of Aggregates
Definition: SROA.cpp:4657
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:863
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:635
An instruction for reading from memory.
Definition: Instructions.h:169
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:930
Hexagon Common GEP
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:624
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
This defines the Use class.
void reserve(size_type N)
Definition: SmallVector.h:369
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type. ...
Definition: DataLayout.h:474
Value * getLength() const
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:401
bool isEscaped() const
Test whether a pointer to the allocation escapes our analysis.
Definition: SROA.cpp:240
void setAlignment(MaybeAlign Align)
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
op_iterator op_begin()
Definition: User.h:229
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:466
unsigned getElementContainingOffset(uint64_t Offset) const
Given a valid byte offset into the structure, returns the structure index that contains it...
Definition: DataLayout.cpp:80
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1517
static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, const DataLayout &DL)
Compute the adjusted alignment for a load or store from an offset.
Definition: SROA.cpp:1683
Builder for the alloca slices.
Definition: SROA.cpp:647
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:275
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL)
Test whether the given slice use can be promoted to a vector.
Definition: SROA.cpp:1810
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: SROA.cpp:4609
void * PointerTy
Definition: GenericValue.h:21
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:585
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:233
This class represents a conversion between pointers from one address space to another.
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:381
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
Class to represent struct types.
Definition: DerivedTypes.h:238
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition: SROA.cpp:3505
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:659
ArrayRef< Slice * > splitSliceTails() const
Get the sequence of split slice tails.
Definition: SROA.cpp:417
bool isAborted() const
Did we abort the visit early?
Definition: PtrUseVisitor.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition: SROA.cpp:1760
A partition of the slices.
Definition: SROA.cpp:355
unsigned getDestAlignment() const
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:592
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: SROA.cpp:4639
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:398
static StructType * get(LLVMContext &Context, ArrayRef< Type *> Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:346
This file implements a class to represent arbitrary precision integral constant values and operations...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
Definition: SROA.cpp:4629
This file provides a collection of visitors which walk the (instruction) uses of a pointer...
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:52
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1696
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:424
User * getUser() const LLVM_READONLY
Returns the User that contains this Use.
Definition: Use.cpp:40
bool visit(AllocaSlices::const_iterator I)
Definition: SROA.cpp:2349
A base class for visitors over the uses of a pointer value.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a nonnull metadata node to a new load instruction.
Definition: Local.cpp:2541
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:454
SmallVectorImpl< Slice >::iterator iterator
Support for iterating over the slices.
Definition: SROA.cpp:244
const_iterator end() const
Definition: SROA.cpp:254
IntType getInt() const
void setLength(Value *L)
A legacy pass for the legacy pass manager that wraps the SROA pass.
Definition: SROA.cpp:4618
Class to represent array types.
Definition: DerivedTypes.h:408
This is the common base class for debug info intrinsics for variables.
Definition: IntrinsicInst.h:87
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:938
This class represents a no-op cast from one type to another.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
ConstantFolder - Create constants with minimum, target independent, folding.
An instruction for storing to memory.
Definition: Instructions.h:325
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:203
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void printUse(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
static Constant * getUDiv(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2283
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:71
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:338
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:67
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy)
Test whether we can convert a value from the old to the new type.
Definition: SROA.cpp:1709
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:579
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1231
void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:307
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
const BasicBlock & getEntryBlock() const
Definition: Function.h:669
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:881
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
Scalar Replacement Of false
Definition: SROA.cpp:4657
iterator_range< partition_iterator > partitions()
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:223
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: PassManager.h:329
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static unsigned getPointerOperandIndex()
Definition: Instructions.h:420
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1184
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
ArrayRef< Use * > getDeadOperands() const
Access the dead operands referring to this alloca.
Definition: SROA.cpp:287
void setAlignment(MaybeAlign Align)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
static bool isSafeSelectToSpeculate(SelectInst &SI)
Select instructions that use an alloca and are subsequently loaded can be rewritten to load both inpu...
Definition: SROA.cpp:1330
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
LLVM_NODISCARD size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:249
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:582
static Value * getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *TargetTy, SmallVectorImpl< Value *> &Indices, Twine NamePrefix)
Get a natural GEP from a base pointer to a particular offset and resulting in a particular type...
Definition: SROA.cpp:1538
static sys::TimePoint< std::chrono::seconds > now(bool Deterministic)
element_iterator element_end() const
Definition: DerivedTypes.h:341
DIExpression * getExpression() const
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:299
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
bool isLifetimeStartOrEnd() const
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
static cl::opt< bool > SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden)
Hidden option to experiment with completely strict handling of inbounds GEPs.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:180
Analysis pass providing a never-invalidated alias analysis result.
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
void initializeSROALegacyPassPass(PassRegistry &)
sroa
Definition: SROA.cpp:4657
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
bool empty() const
op_range operands()
Definition: User.h:237
Value * getPointerOperand()
Definition: Instructions.h:289
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:607
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:687
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI, uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, SmallSetVector< PHINode *, 8 > &PHIUsers, SmallSetVector< SelectInst *, 8 > &SelectUsers)
Definition: SROA.cpp:2320
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1205
const Value * getCondition() const
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:343
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
size_t size() const
Definition: SmallVector.h:52
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:224
bool isVolatile() const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setDestAlignment(unsigned Alignment)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
static Value * getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, Value *BasePtr, Type *Ty, Type *TargetTy, SmallVectorImpl< Value *> &Indices, Twine NamePrefix)
Get a natural GEP off of the BasePtr walking through Ty toward TargetTy without changing the offset o...
Definition: SROA.cpp:1420
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
void printSlice(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition: SROA.cpp:3467
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
SmallBitVector & reset()
This provides the default implementation of the IRBuilder &#39;InsertHelper&#39; method that is called whenev...
Definition: IRBuilder.h:61
This is the superclass of the array and vector type classes.
Definition: DerivedTypes.h:380
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:227
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
print lazy value Lazy Value Info Printer Pass
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:250
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
TinyPtrVector< DbgVariableIntrinsic * > FindDbgAddrUses(Value *V)
Finds all intrinsics declaring local variables as living in the memory that &#39;V&#39; points to...
Definition: Local.cpp:1492
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
iterator end()
Definition: BasicBlock.h:275
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
AllocaSlices(const DataLayout &DL, AllocaInst &AI)
Construct the slices of a particular alloca.
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
partition_iterator & operator++()
Definition: SROA.cpp:602
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition: SROA.cpp:2210
iterator end() const
Definition: ArrayRef.h:137
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:254
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:755
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
iterator begin() const
Definition: SROA.cpp:408
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
uint64_t getSizeInBytes() const
Definition: DataLayout.h:593
SmallVectorImpl< Slice >::const_iterator const_iterator
Definition: SROA.cpp:250
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:184
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition: SROA.cpp:663
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
Value * getRawSource() const
Return the arguments to the instruction.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1292
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
A range adaptor for a pair of iterators.
Class to represent vector types.
Definition: DerivedTypes.h:432
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition: SROA.cpp:2001
iterator_range< user_iterator > users()
Definition: Value.h:420
#define NDEBUG
Definition: regutils.h:48
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:115
void setSourceAlignment(unsigned Alignment)
const Value * getFalseValue() const
element_iterator element_begin() const
Definition: DerivedTypes.h:340
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2153
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:380
const_iterator begin() const
Definition: SROA.cpp:253
static Type * findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices...
Definition: SROA.cpp:1127
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition: SROA.cpp:635
Virtual Register Rewriter
Definition: VirtRegMap.cpp:220
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1977
INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_END(SROALegacyPass
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:549
This class wraps the llvm.memcpy/memmove intrinsics.
bool isPacked() const
Definition: DerivedTypes.h:298
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:356
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
unsigned getDestAddressSpace() const
static const size_t npos
Definition: StringRef.h:50
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:607
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:242
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:378
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: SROA.cpp:4646
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:481
Visitor to rewrite instructions using p particular slice of an alloca to use a new alloca...
Definition: SROA.cpp:2263
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:614
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:190
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
static void speculateSelectInstLoads(SelectInst &SI)
Definition: SROA.cpp:1354
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:264
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
iterator end()
Definition: DenseMap.h:82
FunctionPass * createSROAPass()
Definition: SROA.cpp:4651
uint64_t endOffset() const
The end offset of this partition.
Definition: SROA.cpp:385
Instruction * getEscapingInst() const
Get the instruction causing the pointer to escape.
Definition: PtrUseVisitor.h:88
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:587
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:175
DILocalVariable * getVariable() const
static bool rewrite(Function &F)
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition: SROA.cpp:2184
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:368
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:295
static unsigned getPointerOperandIndex()
Definition: Instructions.h:291
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:396
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:343
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:250
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)