File: | lib/Transforms/Scalar/SROA.cpp |
Location: | line 2635, column 5 |
Description: | Use of memory after it is freed |
1 | //===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===// | |||||
2 | // | |||||
3 | // The LLVM Compiler Infrastructure | |||||
4 | // | |||||
5 | // This file is distributed under the University of Illinois Open Source | |||||
6 | // License. See LICENSE.TXT for details. | |||||
7 | // | |||||
8 | //===----------------------------------------------------------------------===// | |||||
9 | /// \file | |||||
10 | /// This transformation implements the well known scalar replacement of | |||||
11 | /// aggregates transformation. It tries to identify promotable elements of an | |||||
12 | /// aggregate alloca, and promote them to registers. It will also try to | |||||
13 | /// convert uses of an element (or set of elements) of an alloca into a vector | |||||
14 | /// or bitfield-style integer scalar if appropriate. | |||||
15 | /// | |||||
16 | /// It works to do this with minimal slicing of the alloca so that regions | |||||
17 | /// which are merely transferred in and out of external memory remain unchanged | |||||
18 | /// and are not decomposed to scalar code. | |||||
19 | /// | |||||
20 | /// Because this also performs alloca promotion, it can be thought of as also | |||||
21 | /// serving the purpose of SSA formation. The algorithm iterates on the | |||||
22 | /// function until all opportunities for promotion have been realized. | |||||
23 | /// | |||||
24 | //===----------------------------------------------------------------------===// | |||||
25 | ||||||
26 | #include "llvm/Transforms/Scalar.h" | |||||
27 | #include "llvm/ADT/STLExtras.h" | |||||
28 | #include "llvm/ADT/SetVector.h" | |||||
29 | #include "llvm/ADT/SmallVector.h" | |||||
30 | #include "llvm/ADT/Statistic.h" | |||||
31 | #include "llvm/Analysis/AssumptionCache.h" | |||||
32 | #include "llvm/Analysis/Loads.h" | |||||
33 | #include "llvm/Analysis/PtrUseVisitor.h" | |||||
34 | #include "llvm/Analysis/ValueTracking.h" | |||||
35 | #include "llvm/IR/Constants.h" | |||||
36 | #include "llvm/IR/DIBuilder.h" | |||||
37 | #include "llvm/IR/DataLayout.h" | |||||
38 | #include "llvm/IR/DebugInfo.h" | |||||
39 | #include "llvm/IR/DerivedTypes.h" | |||||
40 | #include "llvm/IR/Dominators.h" | |||||
41 | #include "llvm/IR/Function.h" | |||||
42 | #include "llvm/IR/IRBuilder.h" | |||||
43 | #include "llvm/IR/InstVisitor.h" | |||||
44 | #include "llvm/IR/Instructions.h" | |||||
45 | #include "llvm/IR/IntrinsicInst.h" | |||||
46 | #include "llvm/IR/LLVMContext.h" | |||||
47 | #include "llvm/IR/Operator.h" | |||||
48 | #include "llvm/Pass.h" | |||||
49 | #include "llvm/Support/CommandLine.h" | |||||
50 | #include "llvm/Support/Compiler.h" | |||||
51 | #include "llvm/Support/Debug.h" | |||||
52 | #include "llvm/Support/ErrorHandling.h" | |||||
53 | #include "llvm/Support/MathExtras.h" | |||||
54 | #include "llvm/Support/TimeValue.h" | |||||
55 | #include "llvm/Support/raw_ostream.h" | |||||
56 | #include "llvm/Transforms/Utils/Local.h" | |||||
57 | #include "llvm/Transforms/Utils/PromoteMemToReg.h" | |||||
58 | #include "llvm/Transforms/Utils/SSAUpdater.h" | |||||
59 | ||||||
60 | #if __cplusplus201103L >= 201103L && !defined(NDEBUG) | |||||
61 | // We only use this for a debug check in C++11 | |||||
62 | #include <random> | |||||
63 | #endif | |||||
64 | ||||||
65 | using namespace llvm; | |||||
66 | ||||||
67 | #define DEBUG_TYPE"sroa" "sroa" | |||||
68 | ||||||
69 | STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement")static llvm::Statistic NumAllocasAnalyzed = { "sroa", "Number of allocas analyzed for replacement" , 0, 0 }; | |||||
70 | STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed")static llvm::Statistic NumAllocaPartitions = { "sroa", "Number of alloca partitions formed" , 0, 0 }; | |||||
71 | STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca")static llvm::Statistic MaxPartitionsPerAlloca = { "sroa", "Maximum number of partitions per alloca" , 0, 0 }; | |||||
72 | STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten")static llvm::Statistic NumAllocaPartitionUses = { "sroa", "Number of alloca partition uses rewritten" , 0, 0 }; | |||||
73 | STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition")static llvm::Statistic MaxUsesPerAllocaPartition = { "sroa", "Maximum number of uses of a partition" , 0, 0 }; | |||||
74 | STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced")static llvm::Statistic NumNewAllocas = { "sroa", "Number of new, smaller allocas introduced" , 0, 0 }; | |||||
75 | STATISTIC(NumPromoted, "Number of allocas promoted to SSA values")static llvm::Statistic NumPromoted = { "sroa", "Number of allocas promoted to SSA values" , 0, 0 }; | |||||
76 | STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion")static llvm::Statistic NumLoadsSpeculated = { "sroa", "Number of loads speculated to allow promotion" , 0, 0 }; | |||||
77 | STATISTIC(NumDeleted, "Number of instructions deleted")static llvm::Statistic NumDeleted = { "sroa", "Number of instructions deleted" , 0, 0 }; | |||||
78 | STATISTIC(NumVectorized, "Number of vectorized aggregates")static llvm::Statistic NumVectorized = { "sroa", "Number of vectorized aggregates" , 0, 0 }; | |||||
79 | ||||||
80 | /// Hidden option to force the pass to not use DomTree and mem2reg, instead | |||||
81 | /// forming SSA values through the SSAUpdater infrastructure. | |||||
82 | static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false), | |||||
83 | cl::Hidden); | |||||
84 | ||||||
85 | /// Hidden option to enable randomly shuffling the slices to help uncover | |||||
86 | /// instability in their order. | |||||
87 | static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices", | |||||
88 | cl::init(false), cl::Hidden); | |||||
89 | ||||||
90 | /// Hidden option to experiment with completely strict handling of inbounds | |||||
91 | /// GEPs. | |||||
92 | static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), | |||||
93 | cl::Hidden); | |||||
94 | ||||||
95 | namespace { | |||||
96 | /// \brief A custom IRBuilder inserter which prefixes all names if they are | |||||
97 | /// preserved. | |||||
98 | template <bool preserveNames = true> | |||||
99 | class IRBuilderPrefixedInserter | |||||
100 | : public IRBuilderDefaultInserter<preserveNames> { | |||||
101 | std::string Prefix; | |||||
102 | ||||||
103 | public: | |||||
104 | void SetNamePrefix(const Twine &P) { Prefix = P.str(); } | |||||
105 | ||||||
106 | protected: | |||||
107 | void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, | |||||
108 | BasicBlock::iterator InsertPt) const { | |||||
109 | IRBuilderDefaultInserter<preserveNames>::InsertHelper( | |||||
110 | I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt); | |||||
111 | } | |||||
112 | }; | |||||
113 | ||||||
114 | // Specialization for not preserving the name is trivial. | |||||
115 | template <> | |||||
116 | class IRBuilderPrefixedInserter<false> | |||||
117 | : public IRBuilderDefaultInserter<false> { | |||||
118 | public: | |||||
119 | void SetNamePrefix(const Twine &P) {} | |||||
120 | }; | |||||
121 | ||||||
122 | /// \brief Provide a typedef for IRBuilder that drops names in release builds. | |||||
123 | #ifndef NDEBUG | |||||
124 | typedef llvm::IRBuilder<true, ConstantFolder, IRBuilderPrefixedInserter<true>> | |||||
125 | IRBuilderTy; | |||||
126 | #else | |||||
127 | typedef llvm::IRBuilder<false, ConstantFolder, IRBuilderPrefixedInserter<false>> | |||||
128 | IRBuilderTy; | |||||
129 | #endif | |||||
130 | } | |||||
131 | ||||||
132 | namespace { | |||||
133 | /// \brief A used slice of an alloca. | |||||
134 | /// | |||||
135 | /// This structure represents a slice of an alloca used by some instruction. It | |||||
136 | /// stores both the begin and end offsets of this use, a pointer to the use | |||||
137 | /// itself, and a flag indicating whether we can classify the use as splittable | |||||
138 | /// or not when forming partitions of the alloca. | |||||
139 | class Slice { | |||||
140 | /// \brief The beginning offset of the range. | |||||
141 | uint64_t BeginOffset; | |||||
142 | ||||||
143 | /// \brief The ending offset, not included in the range. | |||||
144 | uint64_t EndOffset; | |||||
145 | ||||||
146 | /// \brief Storage for both the use of this slice and whether it can be | |||||
147 | /// split. | |||||
148 | PointerIntPair<Use *, 1, bool> UseAndIsSplittable; | |||||
149 | ||||||
150 | public: | |||||
151 | Slice() : BeginOffset(), EndOffset() {} | |||||
152 | Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable) | |||||
153 | : BeginOffset(BeginOffset), EndOffset(EndOffset), | |||||
154 | UseAndIsSplittable(U, IsSplittable) {} | |||||
155 | ||||||
156 | uint64_t beginOffset() const { return BeginOffset; } | |||||
157 | uint64_t endOffset() const { return EndOffset; } | |||||
158 | ||||||
159 | bool isSplittable() const { return UseAndIsSplittable.getInt(); } | |||||
160 | void makeUnsplittable() { UseAndIsSplittable.setInt(false); } | |||||
161 | ||||||
162 | Use *getUse() const { return UseAndIsSplittable.getPointer(); } | |||||
163 | ||||||
164 | bool isDead() const { return getUse() == nullptr; } | |||||
165 | void kill() { UseAndIsSplittable.setPointer(nullptr); } | |||||
166 | ||||||
167 | /// \brief Support for ordering ranges. | |||||
168 | /// | |||||
169 | /// This provides an ordering over ranges such that start offsets are | |||||
170 | /// always increasing, and within equal start offsets, the end offsets are | |||||
171 | /// decreasing. Thus the spanning range comes first in a cluster with the | |||||
172 | /// same start position. | |||||
173 | bool operator<(const Slice &RHS) const { | |||||
174 | if (beginOffset() < RHS.beginOffset()) | |||||
175 | return true; | |||||
176 | if (beginOffset() > RHS.beginOffset()) | |||||
177 | return false; | |||||
178 | if (isSplittable() != RHS.isSplittable()) | |||||
179 | return !isSplittable(); | |||||
180 | if (endOffset() > RHS.endOffset()) | |||||
181 | return true; | |||||
182 | return false; | |||||
183 | } | |||||
184 | ||||||
185 | /// \brief Support comparison with a single offset to allow binary searches. | |||||
186 | friend LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) bool operator<(const Slice &LHS, | |||||
187 | uint64_t RHSOffset) { | |||||
188 | return LHS.beginOffset() < RHSOffset; | |||||
189 | } | |||||
190 | friend LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) bool operator<(uint64_t LHSOffset, | |||||
191 | const Slice &RHS) { | |||||
192 | return LHSOffset < RHS.beginOffset(); | |||||
193 | } | |||||
194 | ||||||
195 | bool operator==(const Slice &RHS) const { | |||||
196 | return isSplittable() == RHS.isSplittable() && | |||||
197 | beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset(); | |||||
198 | } | |||||
199 | bool operator!=(const Slice &RHS) const { return !operator==(RHS); } | |||||
200 | }; | |||||
201 | } // end anonymous namespace | |||||
202 | ||||||
203 | namespace llvm { | |||||
204 | template <typename T> struct isPodLike; | |||||
205 | template <> struct isPodLike<Slice> { static const bool value = true; }; | |||||
206 | } | |||||
207 | ||||||
208 | namespace { | |||||
209 | /// \brief Representation of the alloca slices. | |||||
210 | /// | |||||
211 | /// This class represents the slices of an alloca which are formed by its | |||||
212 | /// various uses. If a pointer escapes, we can't fully build a representation | |||||
213 | /// for the slices used and we reflect that in this structure. The uses are | |||||
214 | /// stored, sorted by increasing beginning offset and with unsplittable slices | |||||
215 | /// starting at a particular offset before splittable slices. | |||||
216 | class AllocaSlices { | |||||
217 | public: | |||||
218 | /// \brief Construct the slices of a particular alloca. | |||||
219 | AllocaSlices(const DataLayout &DL, AllocaInst &AI); | |||||
220 | ||||||
221 | /// \brief Test whether a pointer to the allocation escapes our analysis. | |||||
222 | /// | |||||
223 | /// If this is true, the slices are never fully built and should be | |||||
224 | /// ignored. | |||||
225 | bool isEscaped() const { return PointerEscapingInstr; } | |||||
226 | ||||||
227 | /// \brief Support for iterating over the slices. | |||||
228 | /// @{ | |||||
229 | typedef SmallVectorImpl<Slice>::iterator iterator; | |||||
230 | typedef iterator_range<iterator> range; | |||||
231 | iterator begin() { return Slices.begin(); } | |||||
232 | iterator end() { return Slices.end(); } | |||||
233 | ||||||
234 | typedef SmallVectorImpl<Slice>::const_iterator const_iterator; | |||||
235 | typedef iterator_range<const_iterator> const_range; | |||||
236 | const_iterator begin() const { return Slices.begin(); } | |||||
237 | const_iterator end() const { return Slices.end(); } | |||||
238 | /// @} | |||||
239 | ||||||
240 | /// \brief Erase a range of slices. | |||||
241 | void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); } | |||||
242 | ||||||
243 | /// \brief Insert new slices for this alloca. | |||||
244 | /// | |||||
245 | /// This moves the slices into the alloca's slices collection, and re-sorts | |||||
246 | /// everything so that the usual ordering properties of the alloca's slices | |||||
247 | /// hold. | |||||
248 | void insert(ArrayRef<Slice> NewSlices) { | |||||
249 | int OldSize = Slices.size(); | |||||
250 | Slices.append(NewSlices.begin(), NewSlices.end()); | |||||
251 | auto SliceI = Slices.begin() + OldSize; | |||||
252 | std::sort(SliceI, Slices.end()); | |||||
253 | std::inplace_merge(Slices.begin(), SliceI, Slices.end()); | |||||
254 | } | |||||
255 | ||||||
256 | // Forward declare an iterator to befriend it. | |||||
257 | class partition_iterator; | |||||
258 | ||||||
259 | /// \brief A partition of the slices. | |||||
260 | /// | |||||
261 | /// An ephemeral representation for a range of slices which can be viewed as | |||||
262 | /// a partition of the alloca. This range represents a span of the alloca's | |||||
263 | /// memory which cannot be split, and provides access to all of the slices | |||||
264 | /// overlapping some part of the partition. | |||||
265 | /// | |||||
266 | /// Objects of this type are produced by traversing the alloca's slices, but | |||||
267 | /// are only ephemeral and not persistent. | |||||
268 | class Partition { | |||||
269 | private: | |||||
270 | friend class AllocaSlices; | |||||
271 | friend class AllocaSlices::partition_iterator; | |||||
272 | ||||||
273 | /// \brief The begining and ending offsets of the alloca for this partition. | |||||
274 | uint64_t BeginOffset, EndOffset; | |||||
275 | ||||||
276 | /// \brief The start end end iterators of this partition. | |||||
277 | iterator SI, SJ; | |||||
278 | ||||||
279 | /// \brief A collection of split slice tails overlapping the partition. | |||||
280 | SmallVector<Slice *, 4> SplitTails; | |||||
281 | ||||||
282 | /// \brief Raw constructor builds an empty partition starting and ending at | |||||
283 | /// the given iterator. | |||||
284 | Partition(iterator SI) : SI(SI), SJ(SI) {} | |||||
285 | ||||||
286 | public: | |||||
287 | /// \brief The start offset of this partition. | |||||
288 | /// | |||||
289 | /// All of the contained slices start at or after this offset. | |||||
290 | uint64_t beginOffset() const { return BeginOffset; } | |||||
291 | ||||||
292 | /// \brief The end offset of this partition. | |||||
293 | /// | |||||
294 | /// All of the contained slices end at or before this offset. | |||||
295 | uint64_t endOffset() const { return EndOffset; } | |||||
296 | ||||||
297 | /// \brief The size of the partition. | |||||
298 | /// | |||||
299 | /// Note that this can never be zero. | |||||
300 | uint64_t size() const { | |||||
301 | assert(BeginOffset < EndOffset && "Partitions must span some bytes!")((BeginOffset < EndOffset && "Partitions must span some bytes!" ) ? static_cast<void> (0) : __assert_fail ("BeginOffset < EndOffset && \"Partitions must span some bytes!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 301, __PRETTY_FUNCTION__)); | |||||
302 | return EndOffset - BeginOffset; | |||||
303 | } | |||||
304 | ||||||
305 | /// \brief Test whether this partition contains no slices, and merely spans | |||||
306 | /// a region occupied by split slices. | |||||
307 | bool empty() const { return SI == SJ; } | |||||
308 | ||||||
309 | /// \name Iterate slices that start within the partition. | |||||
310 | /// These may be splittable or unsplittable. They have a begin offset >= the | |||||
311 | /// partition begin offset. | |||||
312 | /// @{ | |||||
313 | // FIXME: We should probably define a "concat_iterator" helper and use that | |||||
314 | // to stitch together pointee_iterators over the split tails and the | |||||
315 | // contiguous iterators of the partition. That would give a much nicer | |||||
316 | // interface here. We could then additionally expose filtered iterators for | |||||
317 | // split, unsplit, and unsplittable splices based on the usage patterns. | |||||
318 | iterator begin() const { return SI; } | |||||
319 | iterator end() const { return SJ; } | |||||
320 | /// @} | |||||
321 | ||||||
322 | /// \brief Get the sequence of split slice tails. | |||||
323 | /// | |||||
324 | /// These tails are of slices which start before this partition but are | |||||
325 | /// split and overlap into the partition. We accumulate these while forming | |||||
326 | /// partitions. | |||||
327 | ArrayRef<Slice *> splitSliceTails() const { return SplitTails; } | |||||
328 | }; | |||||
329 | ||||||
330 | /// \brief An iterator over partitions of the alloca's slices. | |||||
331 | /// | |||||
332 | /// This iterator implements the core algorithm for partitioning the alloca's | |||||
333 | /// slices. It is a forward iterator as we don't support backtracking for | |||||
334 | /// efficiency reasons, and re-use a single storage area to maintain the | |||||
335 | /// current set of split slices. | |||||
336 | /// | |||||
337 | /// It is templated on the slice iterator type to use so that it can operate | |||||
338 | /// with either const or non-const slice iterators. | |||||
339 | class partition_iterator | |||||
340 | : public iterator_facade_base<partition_iterator, | |||||
341 | std::forward_iterator_tag, Partition> { | |||||
342 | friend class AllocaSlices; | |||||
343 | ||||||
344 | /// \brief Most of the state for walking the partitions is held in a class | |||||
345 | /// with a nice interface for examining them. | |||||
346 | Partition P; | |||||
347 | ||||||
348 | /// \brief We need to keep the end of the slices to know when to stop. | |||||
349 | AllocaSlices::iterator SE; | |||||
350 | ||||||
351 | /// \brief We also need to keep track of the maximum split end offset seen. | |||||
352 | /// FIXME: Do we really? | |||||
353 | uint64_t MaxSplitSliceEndOffset; | |||||
354 | ||||||
355 | /// \brief Sets the partition to be empty at given iterator, and sets the | |||||
356 | /// end iterator. | |||||
357 | partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) | |||||
358 | : P(SI), SE(SE), MaxSplitSliceEndOffset(0) { | |||||
359 | // If not already at the end, advance our state to form the initial | |||||
360 | // partition. | |||||
361 | if (SI != SE) | |||||
362 | advance(); | |||||
363 | } | |||||
364 | ||||||
365 | /// \brief Advance the iterator to the next partition. | |||||
366 | /// | |||||
367 | /// Requires that the iterator not be at the end of the slices. | |||||
368 | void advance() { | |||||
369 | assert((P.SI != SE || !P.SplitTails.empty()) &&(((P.SI != SE || !P.SplitTails.empty()) && "Cannot advance past the end of the slices!" ) ? static_cast<void> (0) : __assert_fail ("(P.SI != SE || !P.SplitTails.empty()) && \"Cannot advance past the end of the slices!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 370, __PRETTY_FUNCTION__)) | |||||
370 | "Cannot advance past the end of the slices!")(((P.SI != SE || !P.SplitTails.empty()) && "Cannot advance past the end of the slices!" ) ? static_cast<void> (0) : __assert_fail ("(P.SI != SE || !P.SplitTails.empty()) && \"Cannot advance past the end of the slices!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 370, __PRETTY_FUNCTION__)); | |||||
371 | ||||||
372 | // Clear out any split uses which have ended. | |||||
373 | if (!P.SplitTails.empty()) { | |||||
374 | if (P.EndOffset >= MaxSplitSliceEndOffset) { | |||||
375 | // If we've finished all splits, this is easy. | |||||
376 | P.SplitTails.clear(); | |||||
377 | MaxSplitSliceEndOffset = 0; | |||||
378 | } else { | |||||
379 | // Remove the uses which have ended in the prior partition. This | |||||
380 | // cannot change the max split slice end because we just checked that | |||||
381 | // the prior partition ended prior to that max. | |||||
382 | P.SplitTails.erase( | |||||
383 | std::remove_if( | |||||
384 | P.SplitTails.begin(), P.SplitTails.end(), | |||||
385 | [&](Slice *S) { return S->endOffset() <= P.EndOffset; }), | |||||
386 | P.SplitTails.end()); | |||||
387 | assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),((std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset ; }) && "Could not find the current max split slice offset!" ) ? static_cast<void> (0) : __assert_fail ("std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 391, __PRETTY_FUNCTION__)) | |||||
388 | [&](Slice *S) {((std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset ; }) && "Could not find the current max split slice offset!" ) ? static_cast<void> (0) : __assert_fail ("std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 391, __PRETTY_FUNCTION__)) | |||||
389 | return S->endOffset() == MaxSplitSliceEndOffset;((std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset ; }) && "Could not find the current max split slice offset!" ) ? static_cast<void> (0) : __assert_fail ("std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 391, __PRETTY_FUNCTION__)) | |||||
390 | }) &&((std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset ; }) && "Could not find the current max split slice offset!" ) ? static_cast<void> (0) : __assert_fail ("std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 391, __PRETTY_FUNCTION__)) | |||||
391 | "Could not find the current max split slice offset!")((std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset ; }) && "Could not find the current max split slice offset!" ) ? static_cast<void> (0) : __assert_fail ("std::any_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 391, __PRETTY_FUNCTION__)); | |||||
392 | assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),((std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset ; }) && "Max split slice end offset is not actually the max!" ) ? static_cast<void> (0) : __assert_fail ("std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 396, __PRETTY_FUNCTION__)) | |||||
393 | [&](Slice *S) {((std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset ; }) && "Max split slice end offset is not actually the max!" ) ? static_cast<void> (0) : __assert_fail ("std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 396, __PRETTY_FUNCTION__)) | |||||
394 | return S->endOffset() <= MaxSplitSliceEndOffset;((std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset ; }) && "Max split slice end offset is not actually the max!" ) ? static_cast<void> (0) : __assert_fail ("std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 396, __PRETTY_FUNCTION__)) | |||||
395 | }) &&((std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset ; }) && "Max split slice end offset is not actually the max!" ) ? static_cast<void> (0) : __assert_fail ("std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 396, __PRETTY_FUNCTION__)) | |||||
396 | "Max split slice end offset is not actually the max!")((std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [& ](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset ; }) && "Max split slice end offset is not actually the max!" ) ? static_cast<void> (0) : __assert_fail ("std::all_of(P.SplitTails.begin(), P.SplitTails.end(), [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 396, __PRETTY_FUNCTION__)); | |||||
397 | } | |||||
398 | } | |||||
399 | ||||||
400 | // If P.SI is already at the end, then we've cleared the split tail and | |||||
401 | // now have an end iterator. | |||||
402 | if (P.SI == SE) { | |||||
403 | assert(P.SplitTails.empty() && "Failed to clear the split slices!")((P.SplitTails.empty() && "Failed to clear the split slices!" ) ? static_cast<void> (0) : __assert_fail ("P.SplitTails.empty() && \"Failed to clear the split slices!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 403, __PRETTY_FUNCTION__)); | |||||
404 | return; | |||||
405 | } | |||||
406 | ||||||
407 | // If we had a non-empty partition previously, set up the state for | |||||
408 | // subsequent partitions. | |||||
409 | if (P.SI != P.SJ) { | |||||
410 | // Accumulate all the splittable slices which started in the old | |||||
411 | // partition into the split list. | |||||
412 | for (Slice &S : P) | |||||
413 | if (S.isSplittable() && S.endOffset() > P.EndOffset) { | |||||
414 | P.SplitTails.push_back(&S); | |||||
415 | MaxSplitSliceEndOffset = | |||||
416 | std::max(S.endOffset(), MaxSplitSliceEndOffset); | |||||
417 | } | |||||
418 | ||||||
419 | // Start from the end of the previous partition. | |||||
420 | P.SI = P.SJ; | |||||
421 | ||||||
422 | // If P.SI is now at the end, we at most have a tail of split slices. | |||||
423 | if (P.SI == SE) { | |||||
424 | P.BeginOffset = P.EndOffset; | |||||
425 | P.EndOffset = MaxSplitSliceEndOffset; | |||||
426 | return; | |||||
427 | } | |||||
428 | ||||||
429 | // If the we have split slices and the next slice is after a gap and is | |||||
430 | // not splittable immediately form an empty partition for the split | |||||
431 | // slices up until the next slice begins. | |||||
432 | if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset && | |||||
433 | !P.SI->isSplittable()) { | |||||
434 | P.BeginOffset = P.EndOffset; | |||||
435 | P.EndOffset = P.SI->beginOffset(); | |||||
436 | return; | |||||
437 | } | |||||
438 | } | |||||
439 | ||||||
440 | // OK, we need to consume new slices. Set the end offset based on the | |||||
441 | // current slice, and step SJ past it. The beginning offset of the | |||||
442 | // parttion is the beginning offset of the next slice unless we have | |||||
443 | // pre-existing split slices that are continuing, in which case we begin | |||||
444 | // at the prior end offset. | |||||
445 | P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset; | |||||
446 | P.EndOffset = P.SI->endOffset(); | |||||
447 | ++P.SJ; | |||||
448 | ||||||
449 | // There are two strategies to form a partition based on whether the | |||||
450 | // partition starts with an unsplittable slice or a splittable slice. | |||||
451 | if (!P.SI->isSplittable()) { | |||||
452 | // When we're forming an unsplittable region, it must always start at | |||||
453 | // the first slice and will extend through its end. | |||||
454 | assert(P.BeginOffset == P.SI->beginOffset())((P.BeginOffset == P.SI->beginOffset()) ? static_cast<void > (0) : __assert_fail ("P.BeginOffset == P.SI->beginOffset()" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 454, __PRETTY_FUNCTION__)); | |||||
455 | ||||||
456 | // Form a partition including all of the overlapping slices with this | |||||
457 | // unsplittable slice. | |||||
458 | while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { | |||||
459 | if (!P.SJ->isSplittable()) | |||||
460 | P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); | |||||
461 | ++P.SJ; | |||||
462 | } | |||||
463 | ||||||
464 | // We have a partition across a set of overlapping unsplittable | |||||
465 | // partitions. | |||||
466 | return; | |||||
467 | } | |||||
468 | ||||||
469 | // If we're starting with a splittable slice, then we need to form | |||||
470 | // a synthetic partition spanning it and any other overlapping splittable | |||||
471 | // splices. | |||||
472 | assert(P.SI->isSplittable() && "Forming a splittable partition!")((P.SI->isSplittable() && "Forming a splittable partition!" ) ? static_cast<void> (0) : __assert_fail ("P.SI->isSplittable() && \"Forming a splittable partition!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 472, __PRETTY_FUNCTION__)); | |||||
473 | ||||||
474 | // Collect all of the overlapping splittable slices. | |||||
475 | while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset && | |||||
476 | P.SJ->isSplittable()) { | |||||
477 | P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); | |||||
478 | ++P.SJ; | |||||
479 | } | |||||
480 | ||||||
481 | // Back upiP.EndOffset if we ended the span early when encountering an | |||||
482 | // unsplittable slice. This synthesizes the early end offset of | |||||
483 | // a partition spanning only splittable slices. | |||||
484 | if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { | |||||
485 | assert(!P.SJ->isSplittable())((!P.SJ->isSplittable()) ? static_cast<void> (0) : __assert_fail ("!P.SJ->isSplittable()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 485, __PRETTY_FUNCTION__)); | |||||
486 | P.EndOffset = P.SJ->beginOffset(); | |||||
487 | } | |||||
488 | } | |||||
489 | ||||||
490 | public: | |||||
491 | bool operator==(const partition_iterator &RHS) const { | |||||
492 | assert(SE == RHS.SE &&((SE == RHS.SE && "End iterators don't match between compared partition iterators!" ) ? static_cast<void> (0) : __assert_fail ("SE == RHS.SE && \"End iterators don't match between compared partition iterators!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 493, __PRETTY_FUNCTION__)) | |||||
493 | "End iterators don't match between compared partition iterators!")((SE == RHS.SE && "End iterators don't match between compared partition iterators!" ) ? static_cast<void> (0) : __assert_fail ("SE == RHS.SE && \"End iterators don't match between compared partition iterators!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 493, __PRETTY_FUNCTION__)); | |||||
494 | ||||||
495 | // The observed positions of partitions is marked by the P.SI iterator and | |||||
496 | // the emptyness of the split slices. The latter is only relevant when | |||||
497 | // P.SI == SE, as the end iterator will additionally have an empty split | |||||
498 | // slices list, but the prior may have the same P.SI and a tail of split | |||||
499 | // slices. | |||||
500 | if (P.SI == RHS.P.SI && | |||||
501 | P.SplitTails.empty() == RHS.P.SplitTails.empty()) { | |||||
502 | assert(P.SJ == RHS.P.SJ &&((P.SJ == RHS.P.SJ && "Same set of slices formed two different sized partitions!" ) ? static_cast<void> (0) : __assert_fail ("P.SJ == RHS.P.SJ && \"Same set of slices formed two different sized partitions!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 503, __PRETTY_FUNCTION__)) | |||||
503 | "Same set of slices formed two different sized partitions!")((P.SJ == RHS.P.SJ && "Same set of slices formed two different sized partitions!" ) ? static_cast<void> (0) : __assert_fail ("P.SJ == RHS.P.SJ && \"Same set of slices formed two different sized partitions!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 503, __PRETTY_FUNCTION__)); | |||||
504 | assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&((P.SplitTails.size() == RHS.P.SplitTails.size() && "Same slice position with differently sized non-empty split " "slice tails!") ? static_cast<void> (0) : __assert_fail ("P.SplitTails.size() == RHS.P.SplitTails.size() && \"Same slice position with differently sized non-empty split \" \"slice tails!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 506, __PRETTY_FUNCTION__)) | |||||
505 | "Same slice position with differently sized non-empty split "((P.SplitTails.size() == RHS.P.SplitTails.size() && "Same slice position with differently sized non-empty split " "slice tails!") ? static_cast<void> (0) : __assert_fail ("P.SplitTails.size() == RHS.P.SplitTails.size() && \"Same slice position with differently sized non-empty split \" \"slice tails!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 506, __PRETTY_FUNCTION__)) | |||||
506 | "slice tails!")((P.SplitTails.size() == RHS.P.SplitTails.size() && "Same slice position with differently sized non-empty split " "slice tails!") ? static_cast<void> (0) : __assert_fail ("P.SplitTails.size() == RHS.P.SplitTails.size() && \"Same slice position with differently sized non-empty split \" \"slice tails!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 506, __PRETTY_FUNCTION__)); | |||||
507 | return true; | |||||
508 | } | |||||
509 | return false; | |||||
510 | } | |||||
511 | ||||||
512 | partition_iterator &operator++() { | |||||
513 | advance(); | |||||
514 | return *this; | |||||
515 | } | |||||
516 | ||||||
517 | Partition &operator*() { return P; } | |||||
518 | }; | |||||
519 | ||||||
520 | /// \brief A forward range over the partitions of the alloca's slices. | |||||
521 | /// | |||||
522 | /// This accesses an iterator range over the partitions of the alloca's | |||||
523 | /// slices. It computes these partitions on the fly based on the overlapping | |||||
524 | /// offsets of the slices and the ability to split them. It will visit "empty" | |||||
525 | /// partitions to cover regions of the alloca only accessed via split | |||||
526 | /// slices. | |||||
527 | iterator_range<partition_iterator> partitions() { | |||||
528 | return make_range(partition_iterator(begin(), end()), | |||||
529 | partition_iterator(end(), end())); | |||||
530 | } | |||||
531 | ||||||
532 | /// \brief Access the dead users for this alloca. | |||||
533 | ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; } | |||||
534 | ||||||
535 | /// \brief Access the dead operands referring to this alloca. | |||||
536 | /// | |||||
537 | /// These are operands which have cannot actually be used to refer to the | |||||
538 | /// alloca as they are outside its range and the user doesn't correct for | |||||
539 | /// that. These mostly consist of PHI node inputs and the like which we just | |||||
540 | /// need to replace with undef. | |||||
541 | ArrayRef<Use *> getDeadOperands() const { return DeadOperands; } | |||||
542 | ||||||
543 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||||
544 | void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; | |||||
545 | void printSlice(raw_ostream &OS, const_iterator I, | |||||
546 | StringRef Indent = " ") const; | |||||
547 | void printUse(raw_ostream &OS, const_iterator I, | |||||
548 | StringRef Indent = " ") const; | |||||
549 | void print(raw_ostream &OS) const; | |||||
550 | void dump(const_iterator I) const; | |||||
551 | void dump() const; | |||||
552 | #endif | |||||
553 | ||||||
554 | private: | |||||
555 | template <typename DerivedT, typename RetT = void> class BuilderBase; | |||||
556 | class SliceBuilder; | |||||
557 | friend class AllocaSlices::SliceBuilder; | |||||
558 | ||||||
559 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||||
560 | /// \brief Handle to alloca instruction to simplify method interfaces. | |||||
561 | AllocaInst &AI; | |||||
562 | #endif | |||||
563 | ||||||
564 | /// \brief The instruction responsible for this alloca not having a known set | |||||
565 | /// of slices. | |||||
566 | /// | |||||
567 | /// When an instruction (potentially) escapes the pointer to the alloca, we | |||||
568 | /// store a pointer to that here and abort trying to form slices of the | |||||
569 | /// alloca. This will be null if the alloca slices are analyzed successfully. | |||||
570 | Instruction *PointerEscapingInstr; | |||||
571 | ||||||
572 | /// \brief The slices of the alloca. | |||||
573 | /// | |||||
574 | /// We store a vector of the slices formed by uses of the alloca here. This | |||||
575 | /// vector is sorted by increasing begin offset, and then the unsplittable | |||||
576 | /// slices before the splittable ones. See the Slice inner class for more | |||||
577 | /// details. | |||||
578 | SmallVector<Slice, 8> Slices; | |||||
579 | ||||||
580 | /// \brief Instructions which will become dead if we rewrite the alloca. | |||||
581 | /// | |||||
582 | /// Note that these are not separated by slice. This is because we expect an | |||||
583 | /// alloca to be completely rewritten or not rewritten at all. If rewritten, | |||||
584 | /// all these instructions can simply be removed and replaced with undef as | |||||
585 | /// they come from outside of the allocated space. | |||||
586 | SmallVector<Instruction *, 8> DeadUsers; | |||||
587 | ||||||
588 | /// \brief Operands which will become dead if we rewrite the alloca. | |||||
589 | /// | |||||
590 | /// These are operands that in their particular use can be replaced with | |||||
591 | /// undef when we rewrite the alloca. These show up in out-of-bounds inputs | |||||
592 | /// to PHI nodes and the like. They aren't entirely dead (there might be | |||||
593 | /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we | |||||
594 | /// want to swap this particular input for undef to simplify the use lists of | |||||
595 | /// the alloca. | |||||
596 | SmallVector<Use *, 8> DeadOperands; | |||||
597 | }; | |||||
598 | } | |||||
599 | ||||||
600 | static Value *foldSelectInst(SelectInst &SI) { | |||||
601 | // If the condition being selected on is a constant or the same value is | |||||
602 | // being selected between, fold the select. Yes this does (rarely) happen | |||||
603 | // early on. | |||||
604 | if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) | |||||
605 | return SI.getOperand(1 + CI->isZero()); | |||||
606 | if (SI.getOperand(1) == SI.getOperand(2)) | |||||
607 | return SI.getOperand(1); | |||||
608 | ||||||
609 | return nullptr; | |||||
610 | } | |||||
611 | ||||||
612 | /// \brief A helper that folds a PHI node or a select. | |||||
613 | static Value *foldPHINodeOrSelectInst(Instruction &I) { | |||||
614 | if (PHINode *PN = dyn_cast<PHINode>(&I)) { | |||||
615 | // If PN merges together the same value, return that value. | |||||
616 | return PN->hasConstantValue(); | |||||
617 | } | |||||
618 | return foldSelectInst(cast<SelectInst>(I)); | |||||
619 | } | |||||
620 | ||||||
621 | /// \brief Builder for the alloca slices. | |||||
622 | /// | |||||
623 | /// This class builds a set of alloca slices by recursively visiting the uses | |||||
624 | /// of an alloca and making a slice for each load and store at each offset. | |||||
625 | class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> { | |||||
626 | friend class PtrUseVisitor<SliceBuilder>; | |||||
627 | friend class InstVisitor<SliceBuilder>; | |||||
628 | typedef PtrUseVisitor<SliceBuilder> Base; | |||||
629 | ||||||
630 | const uint64_t AllocSize; | |||||
631 | AllocaSlices &AS; | |||||
632 | ||||||
633 | SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap; | |||||
634 | SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes; | |||||
635 | ||||||
636 | /// \brief Set to de-duplicate dead instructions found in the use walk. | |||||
637 | SmallPtrSet<Instruction *, 4> VisitedDeadInsts; | |||||
638 | ||||||
639 | public: | |||||
640 | SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS) | |||||
641 | : PtrUseVisitor<SliceBuilder>(DL), | |||||
642 | AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), AS(AS) {} | |||||
643 | ||||||
644 | private: | |||||
645 | void markAsDead(Instruction &I) { | |||||
646 | if (VisitedDeadInsts.insert(&I).second) | |||||
647 | AS.DeadUsers.push_back(&I); | |||||
648 | } | |||||
649 | ||||||
650 | void insertUse(Instruction &I, const APInt &Offset, uint64_t Size, | |||||
651 | bool IsSplittable = false) { | |||||
652 | // Completely skip uses which have a zero size or start either before or | |||||
653 | // past the end of the allocation. | |||||
654 | if (Size == 0 || Offset.uge(AllocSize)) { | |||||
655 | DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offsetdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which has zero size or starts outside of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0) | |||||
656 | << " which has zero size or starts outside of the "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which has zero size or starts outside of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0) | |||||
657 | << AllocSize << " byte alloca:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which has zero size or starts outside of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0) | |||||
658 | << " alloca: " << AS.AI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which has zero size or starts outside of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0) | |||||
659 | << " use: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which has zero size or starts outside of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0); | |||||
660 | return markAsDead(I); | |||||
661 | } | |||||
662 | ||||||
663 | uint64_t BeginOffset = Offset.getZExtValue(); | |||||
664 | uint64_t EndOffset = BeginOffset + Size; | |||||
665 | ||||||
666 | // Clamp the end offset to the end of the allocation. Note that this is | |||||
667 | // formulated to handle even the case where "BeginOffset + Size" overflows. | |||||
668 | // This may appear superficially to be something we could ignore entirely, | |||||
669 | // but that is not so! There may be widened loads or PHI-node uses where | |||||
670 | // some instructions are dead but not others. We can't completely ignore | |||||
671 | // them, and so have to record at least the information here. | |||||
672 | assert(AllocSize >= BeginOffset)((AllocSize >= BeginOffset) ? static_cast<void> (0) : __assert_fail ("AllocSize >= BeginOffset", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 672, __PRETTY_FUNCTION__)); // Established above. | |||||
673 | if (Size > AllocSize - BeginOffset) { | |||||
674 | DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offsetdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset << " to remain within the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0) | |||||
675 | << " to remain within the " << AllocSize << " byte alloca:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset << " to remain within the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0) | |||||
676 | << " alloca: " << AS.AI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset << " to remain within the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0) | |||||
677 | << " use: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset << " to remain within the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << I << "\n"; } } while (0); | |||||
678 | EndOffset = AllocSize; | |||||
679 | } | |||||
680 | ||||||
681 | AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable)); | |||||
682 | } | |||||
683 | ||||||
684 | void visitBitCastInst(BitCastInst &BC) { | |||||
685 | if (BC.use_empty()) | |||||
686 | return markAsDead(BC); | |||||
687 | ||||||
688 | return Base::visitBitCastInst(BC); | |||||
689 | } | |||||
690 | ||||||
691 | void visitGetElementPtrInst(GetElementPtrInst &GEPI) { | |||||
692 | if (GEPI.use_empty()) | |||||
693 | return markAsDead(GEPI); | |||||
694 | ||||||
695 | if (SROAStrictInbounds && GEPI.isInBounds()) { | |||||
696 | // FIXME: This is a manually un-factored variant of the basic code inside | |||||
697 | // of GEPs with checking of the inbounds invariant specified in the | |||||
698 | // langref in a very strict sense. If we ever want to enable | |||||
699 | // SROAStrictInbounds, this code should be factored cleanly into | |||||
700 | // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds | |||||
701 | // by writing out the code here where we have tho underlying allocation | |||||
702 | // size readily available. | |||||
703 | APInt GEPOffset = Offset; | |||||
704 | const DataLayout &DL = GEPI.getModule()->getDataLayout(); | |||||
705 | for (gep_type_iterator GTI = gep_type_begin(GEPI), | |||||
706 | GTE = gep_type_end(GEPI); | |||||
707 | GTI != GTE; ++GTI) { | |||||
708 | ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); | |||||
709 | if (!OpC) | |||||
710 | break; | |||||
711 | ||||||
712 | // Handle a struct index, which adds its field offset to the pointer. | |||||
713 | if (StructType *STy = dyn_cast<StructType>(*GTI)) { | |||||
714 | unsigned ElementIdx = OpC->getZExtValue(); | |||||
715 | const StructLayout *SL = DL.getStructLayout(STy); | |||||
716 | GEPOffset += | |||||
717 | APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx)); | |||||
718 | } else { | |||||
719 | // For array or vector indices, scale the index by the size of the | |||||
720 | // type. | |||||
721 | APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); | |||||
722 | GEPOffset += Index * APInt(Offset.getBitWidth(), | |||||
723 | DL.getTypeAllocSize(GTI.getIndexedType())); | |||||
724 | } | |||||
725 | ||||||
726 | // If this index has computed an intermediate pointer which is not | |||||
727 | // inbounds, then the result of the GEP is a poison value and we can | |||||
728 | // delete it and all uses. | |||||
729 | if (GEPOffset.ugt(AllocSize)) | |||||
730 | return markAsDead(GEPI); | |||||
731 | } | |||||
732 | } | |||||
733 | ||||||
734 | return Base::visitGetElementPtrInst(GEPI); | |||||
735 | } | |||||
736 | ||||||
737 | void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, | |||||
738 | uint64_t Size, bool IsVolatile) { | |||||
739 | // We allow splitting of non-volatile loads and stores where the type is an | |||||
740 | // integer type. These may be used to implement 'memcpy' or other "transfer | |||||
741 | // of bits" patterns. | |||||
742 | bool IsSplittable = Ty->isIntegerTy() && !IsVolatile; | |||||
743 | ||||||
744 | insertUse(I, Offset, Size, IsSplittable); | |||||
745 | } | |||||
746 | ||||||
747 | void visitLoadInst(LoadInst &LI) { | |||||
748 | assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&(((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split") ? static_cast <void> (0) : __assert_fail ("(!LI.isSimple() || LI.getType()->isSingleValueType()) && \"All simple FCA loads should have been pre-split\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 749, __PRETTY_FUNCTION__)) | |||||
749 | "All simple FCA loads should have been pre-split")(((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split") ? static_cast <void> (0) : __assert_fail ("(!LI.isSimple() || LI.getType()->isSingleValueType()) && \"All simple FCA loads should have been pre-split\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 749, __PRETTY_FUNCTION__)); | |||||
750 | ||||||
751 | if (!IsOffsetKnown) | |||||
752 | return PI.setAborted(&LI); | |||||
753 | ||||||
754 | const DataLayout &DL = LI.getModule()->getDataLayout(); | |||||
755 | uint64_t Size = DL.getTypeStoreSize(LI.getType()); | |||||
756 | return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); | |||||
757 | } | |||||
758 | ||||||
759 | void visitStoreInst(StoreInst &SI) { | |||||
760 | Value *ValOp = SI.getValueOperand(); | |||||
761 | if (ValOp == *U) | |||||
762 | return PI.setEscapedAndAborted(&SI); | |||||
763 | if (!IsOffsetKnown) | |||||
764 | return PI.setAborted(&SI); | |||||
765 | ||||||
766 | const DataLayout &DL = SI.getModule()->getDataLayout(); | |||||
767 | uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); | |||||
768 | ||||||
769 | // If this memory access can be shown to *statically* extend outside the | |||||
770 | // bounds of of the allocation, it's behavior is undefined, so simply | |||||
771 | // ignore it. Note that this is more strict than the generic clamping | |||||
772 | // behavior of insertUse. We also try to handle cases which might run the | |||||
773 | // risk of overflow. | |||||
774 | // FIXME: We should instead consider the pointer to have escaped if this | |||||
775 | // function is being instrumented for addressing bugs or race conditions. | |||||
776 | if (Size > AllocSize || Offset.ugt(AllocSize - Size)) { | |||||
777 | DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offsetdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset << " which extends past the end of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << SI << "\n"; } } while (0) | |||||
778 | << " which extends past the end of the " << AllocSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset << " which extends past the end of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << SI << "\n"; } } while (0) | |||||
779 | << " byte alloca:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset << " which extends past the end of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << SI << "\n"; } } while (0) | |||||
780 | << " alloca: " << AS.AI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset << " which extends past the end of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << SI << "\n"; } } while (0) | |||||
781 | << " use: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset << " which extends past the end of the " << AllocSize << " byte alloca:\n" << " alloca: " << AS.AI << "\n" << " use: " << SI << "\n"; } } while (0); | |||||
782 | return markAsDead(SI); | |||||
783 | } | |||||
784 | ||||||
785 | assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&(((!SI.isSimple() || ValOp->getType()->isSingleValueType ()) && "All simple FCA stores should have been pre-split" ) ? static_cast<void> (0) : __assert_fail ("(!SI.isSimple() || ValOp->getType()->isSingleValueType()) && \"All simple FCA stores should have been pre-split\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 786, __PRETTY_FUNCTION__)) | |||||
786 | "All simple FCA stores should have been pre-split")(((!SI.isSimple() || ValOp->getType()->isSingleValueType ()) && "All simple FCA stores should have been pre-split" ) ? static_cast<void> (0) : __assert_fail ("(!SI.isSimple() || ValOp->getType()->isSingleValueType()) && \"All simple FCA stores should have been pre-split\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 786, __PRETTY_FUNCTION__)); | |||||
787 | handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); | |||||
788 | } | |||||
789 | ||||||
790 | void visitMemSetInst(MemSetInst &II) { | |||||
791 | assert(II.getRawDest() == *U && "Pointer use is not the destination?")((II.getRawDest() == *U && "Pointer use is not the destination?" ) ? static_cast<void> (0) : __assert_fail ("II.getRawDest() == *U && \"Pointer use is not the destination?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 791, __PRETTY_FUNCTION__)); | |||||
792 | ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); | |||||
793 | if ((Length && Length->getValue() == 0) || | |||||
794 | (IsOffsetKnown && Offset.uge(AllocSize))) | |||||
795 | // Zero-length mem transfer intrinsics can be ignored entirely. | |||||
796 | return markAsDead(II); | |||||
797 | ||||||
798 | if (!IsOffsetKnown) | |||||
799 | return PI.setAborted(&II); | |||||
800 | ||||||
801 | insertUse(II, Offset, Length ? Length->getLimitedValue() | |||||
802 | : AllocSize - Offset.getLimitedValue(), | |||||
803 | (bool)Length); | |||||
804 | } | |||||
805 | ||||||
806 | void visitMemTransferInst(MemTransferInst &II) { | |||||
807 | ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); | |||||
808 | if (Length && Length->getValue() == 0) | |||||
809 | // Zero-length mem transfer intrinsics can be ignored entirely. | |||||
810 | return markAsDead(II); | |||||
811 | ||||||
812 | // Because we can visit these intrinsics twice, also check to see if the | |||||
813 | // first time marked this instruction as dead. If so, skip it. | |||||
814 | if (VisitedDeadInsts.count(&II)) | |||||
815 | return; | |||||
816 | ||||||
817 | if (!IsOffsetKnown) | |||||
818 | return PI.setAborted(&II); | |||||
819 | ||||||
820 | // This side of the transfer is completely out-of-bounds, and so we can | |||||
821 | // nuke the entire transfer. However, we also need to nuke the other side | |||||
822 | // if already added to our partitions. | |||||
823 | // FIXME: Yet another place we really should bypass this when | |||||
824 | // instrumenting for ASan. | |||||
825 | if (Offset.uge(AllocSize)) { | |||||
826 | SmallDenseMap<Instruction *, unsigned>::iterator MTPI = | |||||
827 | MemTransferSliceMap.find(&II); | |||||
828 | if (MTPI != MemTransferSliceMap.end()) | |||||
829 | AS.Slices[MTPI->second].kill(); | |||||
830 | return markAsDead(II); | |||||
831 | } | |||||
832 | ||||||
833 | uint64_t RawOffset = Offset.getLimitedValue(); | |||||
834 | uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset; | |||||
835 | ||||||
836 | // Check for the special case where the same exact value is used for both | |||||
837 | // source and dest. | |||||
838 | if (*U == II.getRawDest() && *U == II.getRawSource()) { | |||||
839 | // For non-volatile transfers this is a no-op. | |||||
840 | if (!II.isVolatile()) | |||||
841 | return markAsDead(II); | |||||
842 | ||||||
843 | return insertUse(II, Offset, Size, /*IsSplittable=*/false); | |||||
844 | } | |||||
845 | ||||||
846 | // If we have seen both source and destination for a mem transfer, then | |||||
847 | // they both point to the same alloca. | |||||
848 | bool Inserted; | |||||
849 | SmallDenseMap<Instruction *, unsigned>::iterator MTPI; | |||||
850 | std::tie(MTPI, Inserted) = | |||||
851 | MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size())); | |||||
852 | unsigned PrevIdx = MTPI->second; | |||||
853 | if (!Inserted) { | |||||
854 | Slice &PrevP = AS.Slices[PrevIdx]; | |||||
855 | ||||||
856 | // Check if the begin offsets match and this is a non-volatile transfer. | |||||
857 | // In that case, we can completely elide the transfer. | |||||
858 | if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) { | |||||
859 | PrevP.kill(); | |||||
860 | return markAsDead(II); | |||||
861 | } | |||||
862 | ||||||
863 | // Otherwise we have an offset transfer within the same alloca. We can't | |||||
864 | // split those. | |||||
865 | PrevP.makeUnsplittable(); | |||||
866 | } | |||||
867 | ||||||
868 | // Insert the use now that we've fixed up the splittable nature. | |||||
869 | insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length); | |||||
870 | ||||||
871 | // Check that we ended up with a valid index in the map. | |||||
872 | assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&((AS.Slices[PrevIdx].getUse()->getUser() == &II && "Map index doesn't point back to a slice with this user.") ? static_cast<void> (0) : __assert_fail ("AS.Slices[PrevIdx].getUse()->getUser() == &II && \"Map index doesn't point back to a slice with this user.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 873, __PRETTY_FUNCTION__)) | |||||
873 | "Map index doesn't point back to a slice with this user.")((AS.Slices[PrevIdx].getUse()->getUser() == &II && "Map index doesn't point back to a slice with this user.") ? static_cast<void> (0) : __assert_fail ("AS.Slices[PrevIdx].getUse()->getUser() == &II && \"Map index doesn't point back to a slice with this user.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 873, __PRETTY_FUNCTION__)); | |||||
874 | } | |||||
875 | ||||||
876 | // Disable SRoA for any intrinsics except for lifetime invariants. | |||||
877 | // FIXME: What about debug intrinsics? This matches old behavior, but | |||||
878 | // doesn't make sense. | |||||
879 | void visitIntrinsicInst(IntrinsicInst &II) { | |||||
880 | if (!IsOffsetKnown) | |||||
881 | return PI.setAborted(&II); | |||||
882 | ||||||
883 | if (II.getIntrinsicID() == Intrinsic::lifetime_start || | |||||
884 | II.getIntrinsicID() == Intrinsic::lifetime_end) { | |||||
885 | ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); | |||||
886 | uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), | |||||
887 | Length->getLimitedValue()); | |||||
888 | insertUse(II, Offset, Size, true); | |||||
889 | return; | |||||
890 | } | |||||
891 | ||||||
892 | Base::visitIntrinsicInst(II); | |||||
893 | } | |||||
894 | ||||||
895 | Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) { | |||||
896 | // We consider any PHI or select that results in a direct load or store of | |||||
897 | // the same offset to be a viable use for slicing purposes. These uses | |||||
898 | // are considered unsplittable and the size is the maximum loaded or stored | |||||
899 | // size. | |||||
900 | SmallPtrSet<Instruction *, 4> Visited; | |||||
901 | SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; | |||||
902 | Visited.insert(Root); | |||||
903 | Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); | |||||
904 | const DataLayout &DL = Root->getModule()->getDataLayout(); | |||||
905 | // If there are no loads or stores, the access is dead. We mark that as | |||||
906 | // a size zero access. | |||||
907 | Size = 0; | |||||
908 | do { | |||||
909 | Instruction *I, *UsedI; | |||||
910 | std::tie(UsedI, I) = Uses.pop_back_val(); | |||||
911 | ||||||
912 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { | |||||
913 | Size = std::max(Size, DL.getTypeStoreSize(LI->getType())); | |||||
914 | continue; | |||||
915 | } | |||||
916 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) { | |||||
917 | Value *Op = SI->getOperand(0); | |||||
918 | if (Op == UsedI) | |||||
919 | return SI; | |||||
920 | Size = std::max(Size, DL.getTypeStoreSize(Op->getType())); | |||||
921 | continue; | |||||
922 | } | |||||
923 | ||||||
924 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { | |||||
925 | if (!GEP->hasAllZeroIndices()) | |||||
926 | return GEP; | |||||
927 | } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) && | |||||
928 | !isa<SelectInst>(I)) { | |||||
929 | return I; | |||||
930 | } | |||||
931 | ||||||
932 | for (User *U : I->users()) | |||||
933 | if (Visited.insert(cast<Instruction>(U)).second) | |||||
934 | Uses.push_back(std::make_pair(I, cast<Instruction>(U))); | |||||
935 | } while (!Uses.empty()); | |||||
936 | ||||||
937 | return nullptr; | |||||
938 | } | |||||
939 | ||||||
940 | void visitPHINodeOrSelectInst(Instruction &I) { | |||||
941 | assert(isa<PHINode>(I) || isa<SelectInst>(I))((isa<PHINode>(I) || isa<SelectInst>(I)) ? static_cast <void> (0) : __assert_fail ("isa<PHINode>(I) || isa<SelectInst>(I)" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 941, __PRETTY_FUNCTION__)); | |||||
942 | if (I.use_empty()) | |||||
943 | return markAsDead(I); | |||||
944 | ||||||
945 | // TODO: We could use SimplifyInstruction here to fold PHINodes and | |||||
946 | // SelectInsts. However, doing so requires to change the current | |||||
947 | // dead-operand-tracking mechanism. For instance, suppose neither loading | |||||
948 | // from %U nor %other traps. Then "load (select undef, %U, %other)" does not | |||||
949 | // trap either. However, if we simply replace %U with undef using the | |||||
950 | // current dead-operand-tracking mechanism, "load (select undef, undef, | |||||
951 | // %other)" may trap because the select may return the first operand | |||||
952 | // "undef". | |||||
953 | if (Value *Result = foldPHINodeOrSelectInst(I)) { | |||||
954 | if (Result == *U) | |||||
955 | // If the result of the constant fold will be the pointer, recurse | |||||
956 | // through the PHI/select as if we had RAUW'ed it. | |||||
957 | enqueueUsers(I); | |||||
958 | else | |||||
959 | // Otherwise the operand to the PHI/select is dead, and we can replace | |||||
960 | // it with undef. | |||||
961 | AS.DeadOperands.push_back(U); | |||||
962 | ||||||
963 | return; | |||||
964 | } | |||||
965 | ||||||
966 | if (!IsOffsetKnown) | |||||
967 | return PI.setAborted(&I); | |||||
968 | ||||||
969 | // See if we already have computed info on this node. | |||||
970 | uint64_t &Size = PHIOrSelectSizes[&I]; | |||||
971 | if (!Size) { | |||||
972 | // This is a new PHI/Select, check for an unsafe use of it. | |||||
973 | if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size)) | |||||
974 | return PI.setAborted(UnsafeI); | |||||
975 | } | |||||
976 | ||||||
977 | // For PHI and select operands outside the alloca, we can't nuke the entire | |||||
978 | // phi or select -- the other side might still be relevant, so we special | |||||
979 | // case them here and use a separate structure to track the operands | |||||
980 | // themselves which should be replaced with undef. | |||||
981 | // FIXME: This should instead be escaped in the event we're instrumenting | |||||
982 | // for address sanitization. | |||||
983 | if (Offset.uge(AllocSize)) { | |||||
984 | AS.DeadOperands.push_back(U); | |||||
985 | return; | |||||
986 | } | |||||
987 | ||||||
988 | insertUse(I, Offset, Size); | |||||
989 | } | |||||
990 | ||||||
991 | void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); } | |||||
992 | ||||||
993 | void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); } | |||||
994 | ||||||
995 | /// \brief Disable SROA entirely if there are unhandled users of the alloca. | |||||
996 | void visitInstruction(Instruction &I) { PI.setAborted(&I); } | |||||
997 | }; | |||||
998 | ||||||
999 | AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) | |||||
1000 | : | |||||
1001 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||||
1002 | AI(AI), | |||||
1003 | #endif | |||||
1004 | PointerEscapingInstr(nullptr) { | |||||
1005 | SliceBuilder PB(DL, AI, *this); | |||||
1006 | SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); | |||||
1007 | if (PtrI.isEscaped() || PtrI.isAborted()) { | |||||
1008 | // FIXME: We should sink the escape vs. abort info into the caller nicely, | |||||
1009 | // possibly by just storing the PtrInfo in the AllocaSlices. | |||||
1010 | PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst() | |||||
1011 | : PtrI.getAbortingInst(); | |||||
1012 | assert(PointerEscapingInstr && "Did not track a bad instruction")((PointerEscapingInstr && "Did not track a bad instruction" ) ? static_cast<void> (0) : __assert_fail ("PointerEscapingInstr && \"Did not track a bad instruction\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1012, __PRETTY_FUNCTION__)); | |||||
1013 | return; | |||||
1014 | } | |||||
1015 | ||||||
1016 | Slices.erase(std::remove_if(Slices.begin(), Slices.end(), | |||||
1017 | [](const Slice &S) { | |||||
1018 | return S.isDead(); | |||||
1019 | }), | |||||
1020 | Slices.end()); | |||||
1021 | ||||||
1022 | #if __cplusplus201103L >= 201103L && !defined(NDEBUG) | |||||
1023 | if (SROARandomShuffleSlices) { | |||||
1024 | std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec())); | |||||
1025 | std::shuffle(Slices.begin(), Slices.end(), MT); | |||||
1026 | } | |||||
1027 | #endif | |||||
1028 | ||||||
1029 | // Sort the uses. This arranges for the offsets to be in ascending order, | |||||
1030 | // and the sizes to be in descending order. | |||||
1031 | std::sort(Slices.begin(), Slices.end()); | |||||
1032 | } | |||||
1033 | ||||||
1034 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||||
1035 | ||||||
1036 | void AllocaSlices::print(raw_ostream &OS, const_iterator I, | |||||
1037 | StringRef Indent) const { | |||||
1038 | printSlice(OS, I, Indent); | |||||
1039 | OS << "\n"; | |||||
1040 | printUse(OS, I, Indent); | |||||
1041 | } | |||||
1042 | ||||||
1043 | void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I, | |||||
1044 | StringRef Indent) const { | |||||
1045 | OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")" | |||||
1046 | << " slice #" << (I - begin()) | |||||
1047 | << (I->isSplittable() ? " (splittable)" : ""); | |||||
1048 | } | |||||
1049 | ||||||
1050 | void AllocaSlices::printUse(raw_ostream &OS, const_iterator I, | |||||
1051 | StringRef Indent) const { | |||||
1052 | OS << Indent << " used by: " << *I->getUse()->getUser() << "\n"; | |||||
1053 | } | |||||
1054 | ||||||
1055 | void AllocaSlices::print(raw_ostream &OS) const { | |||||
1056 | if (PointerEscapingInstr) { | |||||
1057 | OS << "Can't analyze slices for alloca: " << AI << "\n" | |||||
1058 | << " A pointer to this alloca escaped by:\n" | |||||
1059 | << " " << *PointerEscapingInstr << "\n"; | |||||
1060 | return; | |||||
1061 | } | |||||
1062 | ||||||
1063 | OS << "Slices of alloca: " << AI << "\n"; | |||||
1064 | for (const_iterator I = begin(), E = end(); I != E; ++I) | |||||
1065 | print(OS, I); | |||||
1066 | } | |||||
1067 | ||||||
1068 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void AllocaSlices::dump(const_iterator I) const { | |||||
1069 | print(dbgs(), I); | |||||
1070 | } | |||||
1071 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void AllocaSlices::dump() const { print(dbgs()); } | |||||
1072 | ||||||
1073 | #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||||
1074 | ||||||
1075 | namespace { | |||||
1076 | /// \brief Implementation of LoadAndStorePromoter for promoting allocas. | |||||
1077 | /// | |||||
1078 | /// This subclass of LoadAndStorePromoter adds overrides to handle promoting | |||||
1079 | /// the loads and stores of an alloca instruction, as well as updating its | |||||
1080 | /// debug information. This is used when a domtree is unavailable and thus | |||||
1081 | /// mem2reg in its full form can't be used to handle promotion of allocas to | |||||
1082 | /// scalar values. | |||||
1083 | class AllocaPromoter : public LoadAndStorePromoter { | |||||
1084 | AllocaInst &AI; | |||||
1085 | DIBuilder &DIB; | |||||
1086 | ||||||
1087 | SmallVector<DbgDeclareInst *, 4> DDIs; | |||||
1088 | SmallVector<DbgValueInst *, 4> DVIs; | |||||
1089 | ||||||
1090 | public: | |||||
1091 | AllocaPromoter(ArrayRef<const Instruction *> Insts, | |||||
1092 | SSAUpdater &S, | |||||
1093 | AllocaInst &AI, DIBuilder &DIB) | |||||
1094 | : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} | |||||
1095 | ||||||
1096 | void run(const SmallVectorImpl<Instruction *> &Insts) { | |||||
1097 | // Retain the debug information attached to the alloca for use when | |||||
1098 | // rewriting loads and stores. | |||||
1099 | if (auto *L = LocalAsMetadata::getIfExists(&AI)) { | |||||
1100 | if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) { | |||||
1101 | for (User *U : DINode->users()) | |||||
1102 | if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) | |||||
1103 | DDIs.push_back(DDI); | |||||
1104 | else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U)) | |||||
1105 | DVIs.push_back(DVI); | |||||
1106 | } | |||||
1107 | } | |||||
1108 | ||||||
1109 | LoadAndStorePromoter::run(Insts); | |||||
1110 | ||||||
1111 | // While we have the debug information, clear it off of the alloca. The | |||||
1112 | // caller takes care of deleting the alloca. | |||||
1113 | while (!DDIs.empty()) | |||||
1114 | DDIs.pop_back_val()->eraseFromParent(); | |||||
1115 | while (!DVIs.empty()) | |||||
1116 | DVIs.pop_back_val()->eraseFromParent(); | |||||
1117 | } | |||||
1118 | ||||||
1119 | bool | |||||
1120 | isInstInList(Instruction *I, | |||||
1121 | const SmallVectorImpl<Instruction *> &Insts) const override { | |||||
1122 | Value *Ptr; | |||||
1123 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) | |||||
1124 | Ptr = LI->getOperand(0); | |||||
1125 | else | |||||
1126 | Ptr = cast<StoreInst>(I)->getPointerOperand(); | |||||
1127 | ||||||
1128 | // Only used to detect cycles, which will be rare and quickly found as | |||||
1129 | // we're walking up a chain of defs rather than down through uses. | |||||
1130 | SmallPtrSet<Value *, 4> Visited; | |||||
1131 | ||||||
1132 | do { | |||||
1133 | if (Ptr == &AI) | |||||
1134 | return true; | |||||
1135 | ||||||
1136 | if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr)) | |||||
1137 | Ptr = BCI->getOperand(0); | |||||
1138 | else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) | |||||
1139 | Ptr = GEPI->getPointerOperand(); | |||||
1140 | else | |||||
1141 | return false; | |||||
1142 | ||||||
1143 | } while (Visited.insert(Ptr).second); | |||||
1144 | ||||||
1145 | return false; | |||||
1146 | } | |||||
1147 | ||||||
1148 | void updateDebugInfo(Instruction *Inst) const override { | |||||
1149 | for (DbgDeclareInst *DDI : DDIs) | |||||
1150 | if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) | |||||
1151 | ConvertDebugDeclareToDebugValue(DDI, SI, DIB); | |||||
1152 | else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) | |||||
1153 | ConvertDebugDeclareToDebugValue(DDI, LI, DIB); | |||||
1154 | for (DbgValueInst *DVI : DVIs) { | |||||
1155 | Value *Arg = nullptr; | |||||
1156 | if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { | |||||
1157 | // If an argument is zero extended then use argument directly. The ZExt | |||||
1158 | // may be zapped by an optimization pass in future. | |||||
1159 | if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) | |||||
1160 | Arg = dyn_cast<Argument>(ZExt->getOperand(0)); | |||||
1161 | else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) | |||||
1162 | Arg = dyn_cast<Argument>(SExt->getOperand(0)); | |||||
1163 | if (!Arg) | |||||
1164 | Arg = SI->getValueOperand(); | |||||
1165 | } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { | |||||
1166 | Arg = LI->getPointerOperand(); | |||||
1167 | } else { | |||||
1168 | continue; | |||||
1169 | } | |||||
1170 | DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(), | |||||
1171 | DVI->getExpression(), DVI->getDebugLoc(), | |||||
1172 | Inst); | |||||
1173 | } | |||||
1174 | } | |||||
1175 | }; | |||||
1176 | } // end anon namespace | |||||
1177 | ||||||
1178 | namespace { | |||||
1179 | /// \brief An optimization pass providing Scalar Replacement of Aggregates. | |||||
1180 | /// | |||||
1181 | /// This pass takes allocations which can be completely analyzed (that is, they | |||||
1182 | /// don't escape) and tries to turn them into scalar SSA values. There are | |||||
1183 | /// a few steps to this process. | |||||
1184 | /// | |||||
1185 | /// 1) It takes allocations of aggregates and analyzes the ways in which they | |||||
1186 | /// are used to try to split them into smaller allocations, ideally of | |||||
1187 | /// a single scalar data type. It will split up memcpy and memset accesses | |||||
1188 | /// as necessary and try to isolate individual scalar accesses. | |||||
1189 | /// 2) It will transform accesses into forms which are suitable for SSA value | |||||
1190 | /// promotion. This can be replacing a memset with a scalar store of an | |||||
1191 | /// integer value, or it can involve speculating operations on a PHI or | |||||
1192 | /// select to be a PHI or select of the results. | |||||
1193 | /// 3) Finally, this will try to detect a pattern of accesses which map cleanly | |||||
1194 | /// onto insert and extract operations on a vector value, and convert them to | |||||
1195 | /// this form. By doing so, it will enable promotion of vector aggregates to | |||||
1196 | /// SSA vector values. | |||||
1197 | class SROA : public FunctionPass { | |||||
1198 | const bool RequiresDomTree; | |||||
1199 | ||||||
1200 | LLVMContext *C; | |||||
1201 | DominatorTree *DT; | |||||
1202 | AssumptionCache *AC; | |||||
1203 | ||||||
1204 | /// \brief Worklist of alloca instructions to simplify. | |||||
1205 | /// | |||||
1206 | /// Each alloca in the function is added to this. Each new alloca formed gets | |||||
1207 | /// added to it as well to recursively simplify unless that alloca can be | |||||
1208 | /// directly promoted. Finally, each time we rewrite a use of an alloca other | |||||
1209 | /// the one being actively rewritten, we add it back onto the list if not | |||||
1210 | /// already present to ensure it is re-visited. | |||||
1211 | SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> Worklist; | |||||
1212 | ||||||
1213 | /// \brief A collection of instructions to delete. | |||||
1214 | /// We try to batch deletions to simplify code and make things a bit more | |||||
1215 | /// efficient. | |||||
1216 | SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts; | |||||
1217 | ||||||
1218 | /// \brief Post-promotion worklist. | |||||
1219 | /// | |||||
1220 | /// Sometimes we discover an alloca which has a high probability of becoming | |||||
1221 | /// viable for SROA after a round of promotion takes place. In those cases, | |||||
1222 | /// the alloca is enqueued here for re-processing. | |||||
1223 | /// | |||||
1224 | /// Note that we have to be very careful to clear allocas out of this list in | |||||
1225 | /// the event they are deleted. | |||||
1226 | SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> PostPromotionWorklist; | |||||
1227 | ||||||
1228 | /// \brief A collection of alloca instructions we can directly promote. | |||||
1229 | std::vector<AllocaInst *> PromotableAllocas; | |||||
1230 | ||||||
1231 | /// \brief A worklist of PHIs to speculate prior to promoting allocas. | |||||
1232 | /// | |||||
1233 | /// All of these PHIs have been checked for the safety of speculation and by | |||||
1234 | /// being speculated will allow promoting allocas currently in the promotable | |||||
1235 | /// queue. | |||||
1236 | SetVector<PHINode *, SmallVector<PHINode *, 2>> SpeculatablePHIs; | |||||
1237 | ||||||
1238 | /// \brief A worklist of select instructions to speculate prior to promoting | |||||
1239 | /// allocas. | |||||
1240 | /// | |||||
1241 | /// All of these select instructions have been checked for the safety of | |||||
1242 | /// speculation and by being speculated will allow promoting allocas | |||||
1243 | /// currently in the promotable queue. | |||||
1244 | SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects; | |||||
1245 | ||||||
1246 | public: | |||||
1247 | SROA(bool RequiresDomTree = true) | |||||
1248 | : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr), | |||||
1249 | DT(nullptr) { | |||||
1250 | initializeSROAPass(*PassRegistry::getPassRegistry()); | |||||
1251 | } | |||||
1252 | bool runOnFunction(Function &F) override; | |||||
1253 | void getAnalysisUsage(AnalysisUsage &AU) const override; | |||||
1254 | ||||||
1255 | const char *getPassName() const override { return "SROA"; } | |||||
1256 | static char ID; | |||||
1257 | ||||||
1258 | private: | |||||
1259 | friend class PHIOrSelectSpeculator; | |||||
1260 | friend class AllocaSliceRewriter; | |||||
1261 | ||||||
1262 | bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS); | |||||
1263 | AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, | |||||
1264 | AllocaSlices::Partition &P); | |||||
1265 | bool splitAlloca(AllocaInst &AI, AllocaSlices &AS); | |||||
1266 | bool runOnAlloca(AllocaInst &AI); | |||||
1267 | void clobberUse(Use &U); | |||||
1268 | void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas); | |||||
1269 | bool promoteAllocas(Function &F); | |||||
1270 | }; | |||||
1271 | } | |||||
1272 | ||||||
1273 | char SROA::ID = 0; | |||||
1274 | ||||||
1275 | FunctionPass *llvm::createSROAPass(bool RequiresDomTree) { | |||||
1276 | return new SROA(RequiresDomTree); | |||||
1277 | } | |||||
1278 | ||||||
1279 | INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false,static void* initializeSROAPassOnce(PassRegistry &Registry ) { | |||||
1280 | false)static void* initializeSROAPassOnce(PassRegistry &Registry ) { | |||||
1281 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | |||||
1282 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||||
1283 | INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false,PassInfo *PI = new PassInfo("Scalar Replacement Of Aggregates" , "sroa", & SROA ::ID, PassInfo::NormalCtor_t(callDefaultCtor < SROA >), false, false); Registry.registerPass(*PI, true ); return PI; } void llvm::initializeSROAPass(PassRegistry & Registry) { static volatile sys::cas_flag initialized = 0; sys ::cas_flag old_val = sys::CompareAndSwap(&initialized, 1, 0); if (old_val == 0) { initializeSROAPassOnce(Registry); sys ::MemoryFence(); AnnotateIgnoreWritesBegin("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1284); AnnotateHappensBefore("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1284, &initialized); initialized = 2; AnnotateIgnoreWritesEnd ("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1284); } else { sys::cas_flag tmp = initialized; sys::MemoryFence (); while (tmp != 2) { tmp = initialized; sys::MemoryFence(); } } AnnotateHappensAfter("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1284, &initialized); } | |||||
1284 | false)PassInfo *PI = new PassInfo("Scalar Replacement Of Aggregates" , "sroa", & SROA ::ID, PassInfo::NormalCtor_t(callDefaultCtor < SROA >), false, false); Registry.registerPass(*PI, true ); return PI; } void llvm::initializeSROAPass(PassRegistry & Registry) { static volatile sys::cas_flag initialized = 0; sys ::cas_flag old_val = sys::CompareAndSwap(&initialized, 1, 0); if (old_val == 0) { initializeSROAPassOnce(Registry); sys ::MemoryFence(); AnnotateIgnoreWritesBegin("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1284); AnnotateHappensBefore("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1284, &initialized); initialized = 2; AnnotateIgnoreWritesEnd ("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1284); } else { sys::cas_flag tmp = initialized; sys::MemoryFence (); while (tmp != 2) { tmp = initialized; sys::MemoryFence(); } } AnnotateHappensAfter("/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1284, &initialized); } | |||||
1285 | ||||||
1286 | /// Walk the range of a partitioning looking for a common type to cover this | |||||
1287 | /// sequence of slices. | |||||
1288 | static Type *findCommonType(AllocaSlices::const_iterator B, | |||||
1289 | AllocaSlices::const_iterator E, | |||||
1290 | uint64_t EndOffset) { | |||||
1291 | Type *Ty = nullptr; | |||||
1292 | bool TyIsCommon = true; | |||||
1293 | IntegerType *ITy = nullptr; | |||||
1294 | ||||||
1295 | // Note that we need to look at *every* alloca slice's Use to ensure we | |||||
1296 | // always get consistent results regardless of the order of slices. | |||||
1297 | for (AllocaSlices::const_iterator I = B; I != E; ++I) { | |||||
1298 | Use *U = I->getUse(); | |||||
1299 | if (isa<IntrinsicInst>(*U->getUser())) | |||||
1300 | continue; | |||||
1301 | if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset) | |||||
1302 | continue; | |||||
1303 | ||||||
1304 | Type *UserTy = nullptr; | |||||
1305 | if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { | |||||
1306 | UserTy = LI->getType(); | |||||
1307 | } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { | |||||
1308 | UserTy = SI->getValueOperand()->getType(); | |||||
1309 | } | |||||
1310 | ||||||
1311 | if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) { | |||||
1312 | // If the type is larger than the partition, skip it. We only encounter | |||||
1313 | // this for split integer operations where we want to use the type of the | |||||
1314 | // entity causing the split. Also skip if the type is not a byte width | |||||
1315 | // multiple. | |||||
1316 | if (UserITy->getBitWidth() % 8 != 0 || | |||||
1317 | UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset())) | |||||
1318 | continue; | |||||
1319 | ||||||
1320 | // Track the largest bitwidth integer type used in this way in case there | |||||
1321 | // is no common type. | |||||
1322 | if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth()) | |||||
1323 | ITy = UserITy; | |||||
1324 | } | |||||
1325 | ||||||
1326 | // To avoid depending on the order of slices, Ty and TyIsCommon must not | |||||
1327 | // depend on types skipped above. | |||||
1328 | if (!UserTy || (Ty && Ty != UserTy)) | |||||
1329 | TyIsCommon = false; // Give up on anything but an iN type. | |||||
1330 | else | |||||
1331 | Ty = UserTy; | |||||
1332 | } | |||||
1333 | ||||||
1334 | return TyIsCommon ? Ty : ITy; | |||||
1335 | } | |||||
1336 | ||||||
1337 | /// PHI instructions that use an alloca and are subsequently loaded can be | |||||
1338 | /// rewritten to load both input pointers in the pred blocks and then PHI the | |||||
1339 | /// results, allowing the load of the alloca to be promoted. | |||||
1340 | /// From this: | |||||
1341 | /// %P2 = phi [i32* %Alloca, i32* %Other] | |||||
1342 | /// %V = load i32* %P2 | |||||
1343 | /// to: | |||||
1344 | /// %V1 = load i32* %Alloca -> will be mem2reg'd | |||||
1345 | /// ... | |||||
1346 | /// %V2 = load i32* %Other | |||||
1347 | /// ... | |||||
1348 | /// %V = phi [i32 %V1, i32 %V2] | |||||
1349 | /// | |||||
1350 | /// We can do this to a select if its only uses are loads and if the operands | |||||
1351 | /// to the select can be loaded unconditionally. | |||||
1352 | /// | |||||
1353 | /// FIXME: This should be hoisted into a generic utility, likely in | |||||
1354 | /// Transforms/Util/Local.h | |||||
1355 | static bool isSafePHIToSpeculate(PHINode &PN) { | |||||
1356 | // For now, we can only do this promotion if the load is in the same block | |||||
1357 | // as the PHI, and if there are no stores between the phi and load. | |||||
1358 | // TODO: Allow recursive phi users. | |||||
1359 | // TODO: Allow stores. | |||||
1360 | BasicBlock *BB = PN.getParent(); | |||||
1361 | unsigned MaxAlign = 0; | |||||
1362 | bool HaveLoad = false; | |||||
1363 | for (User *U : PN.users()) { | |||||
1364 | LoadInst *LI = dyn_cast<LoadInst>(U); | |||||
1365 | if (!LI || !LI->isSimple()) | |||||
1366 | return false; | |||||
1367 | ||||||
1368 | // For now we only allow loads in the same block as the PHI. This is | |||||
1369 | // a common case that happens when instcombine merges two loads through | |||||
1370 | // a PHI. | |||||
1371 | if (LI->getParent() != BB) | |||||
1372 | return false; | |||||
1373 | ||||||
1374 | // Ensure that there are no instructions between the PHI and the load that | |||||
1375 | // could store. | |||||
1376 | for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI) | |||||
1377 | if (BBI->mayWriteToMemory()) | |||||
1378 | return false; | |||||
1379 | ||||||
1380 | MaxAlign = std::max(MaxAlign, LI->getAlignment()); | |||||
1381 | HaveLoad = true; | |||||
1382 | } | |||||
1383 | ||||||
1384 | if (!HaveLoad) | |||||
1385 | return false; | |||||
1386 | ||||||
1387 | const DataLayout &DL = PN.getModule()->getDataLayout(); | |||||
1388 | ||||||
1389 | // We can only transform this if it is safe to push the loads into the | |||||
1390 | // predecessor blocks. The only thing to watch out for is that we can't put | |||||
1391 | // a possibly trapping load in the predecessor if it is a critical edge. | |||||
1392 | for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { | |||||
1393 | TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); | |||||
1394 | Value *InVal = PN.getIncomingValue(Idx); | |||||
1395 | ||||||
1396 | // If the value is produced by the terminator of the predecessor (an | |||||
1397 | // invoke) or it has side-effects, there is no valid place to put a load | |||||
1398 | // in the predecessor. | |||||
1399 | if (TI == InVal || TI->mayHaveSideEffects()) | |||||
1400 | return false; | |||||
1401 | ||||||
1402 | // If the predecessor has a single successor, then the edge isn't | |||||
1403 | // critical. | |||||
1404 | if (TI->getNumSuccessors() == 1) | |||||
1405 | continue; | |||||
1406 | ||||||
1407 | // If this pointer is always safe to load, or if we can prove that there | |||||
1408 | // is already a load in the block, then we can move the load to the pred | |||||
1409 | // block. | |||||
1410 | if (isDereferenceablePointer(InVal, DL) || | |||||
1411 | isSafeToLoadUnconditionally(InVal, TI, MaxAlign)) | |||||
1412 | continue; | |||||
1413 | ||||||
1414 | return false; | |||||
1415 | } | |||||
1416 | ||||||
1417 | return true; | |||||
1418 | } | |||||
1419 | ||||||
1420 | static void speculatePHINodeLoads(PHINode &PN) { | |||||
1421 | DEBUG(dbgs() << " original: " << PN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << PN << "\n"; } } while (0); | |||||
1422 | ||||||
1423 | Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); | |||||
1424 | IRBuilderTy PHIBuilder(&PN); | |||||
1425 | PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), | |||||
1426 | PN.getName() + ".sroa.speculated"); | |||||
1427 | ||||||
1428 | // Get the AA tags and alignment to use from one of the loads. It doesn't | |||||
1429 | // matter which one we get and if any differ. | |||||
1430 | LoadInst *SomeLoad = cast<LoadInst>(PN.user_back()); | |||||
1431 | ||||||
1432 | AAMDNodes AATags; | |||||
1433 | SomeLoad->getAAMetadata(AATags); | |||||
1434 | unsigned Align = SomeLoad->getAlignment(); | |||||
1435 | ||||||
1436 | // Rewrite all loads of the PN to use the new PHI. | |||||
1437 | while (!PN.use_empty()) { | |||||
1438 | LoadInst *LI = cast<LoadInst>(PN.user_back()); | |||||
1439 | LI->replaceAllUsesWith(NewPN); | |||||
1440 | LI->eraseFromParent(); | |||||
1441 | } | |||||
1442 | ||||||
1443 | // Inject loads into all of the pred blocks. | |||||
1444 | for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { | |||||
1445 | BasicBlock *Pred = PN.getIncomingBlock(Idx); | |||||
1446 | TerminatorInst *TI = Pred->getTerminator(); | |||||
1447 | Value *InVal = PN.getIncomingValue(Idx); | |||||
1448 | IRBuilderTy PredBuilder(TI); | |||||
1449 | ||||||
1450 | LoadInst *Load = PredBuilder.CreateLoad( | |||||
1451 | InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName())); | |||||
1452 | ++NumLoadsSpeculated; | |||||
1453 | Load->setAlignment(Align); | |||||
1454 | if (AATags) | |||||
1455 | Load->setAAMetadata(AATags); | |||||
1456 | NewPN->addIncoming(Load, Pred); | |||||
1457 | } | |||||
1458 | ||||||
1459 | DEBUG(dbgs() << " speculated to: " << *NewPN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " speculated to: " << *NewPN << "\n"; } } while (0); | |||||
1460 | PN.eraseFromParent(); | |||||
1461 | } | |||||
1462 | ||||||
1463 | /// Select instructions that use an alloca and are subsequently loaded can be | |||||
1464 | /// rewritten to load both input pointers and then select between the result, | |||||
1465 | /// allowing the load of the alloca to be promoted. | |||||
1466 | /// From this: | |||||
1467 | /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other | |||||
1468 | /// %V = load i32* %P2 | |||||
1469 | /// to: | |||||
1470 | /// %V1 = load i32* %Alloca -> will be mem2reg'd | |||||
1471 | /// %V2 = load i32* %Other | |||||
1472 | /// %V = select i1 %cond, i32 %V1, i32 %V2 | |||||
1473 | /// | |||||
1474 | /// We can do this to a select if its only uses are loads and if the operand | |||||
1475 | /// to the select can be loaded unconditionally. | |||||
1476 | static bool isSafeSelectToSpeculate(SelectInst &SI) { | |||||
1477 | Value *TValue = SI.getTrueValue(); | |||||
1478 | Value *FValue = SI.getFalseValue(); | |||||
1479 | const DataLayout &DL = SI.getModule()->getDataLayout(); | |||||
1480 | bool TDerefable = isDereferenceablePointer(TValue, DL); | |||||
1481 | bool FDerefable = isDereferenceablePointer(FValue, DL); | |||||
1482 | ||||||
1483 | for (User *U : SI.users()) { | |||||
1484 | LoadInst *LI = dyn_cast<LoadInst>(U); | |||||
1485 | if (!LI || !LI->isSimple()) | |||||
1486 | return false; | |||||
1487 | ||||||
1488 | // Both operands to the select need to be dereferencable, either | |||||
1489 | // absolutely (e.g. allocas) or at this point because we can see other | |||||
1490 | // accesses to it. | |||||
1491 | if (!TDerefable && | |||||
1492 | !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment())) | |||||
1493 | return false; | |||||
1494 | if (!FDerefable && | |||||
1495 | !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment())) | |||||
1496 | return false; | |||||
1497 | } | |||||
1498 | ||||||
1499 | return true; | |||||
1500 | } | |||||
1501 | ||||||
1502 | static void speculateSelectInstLoads(SelectInst &SI) { | |||||
1503 | DEBUG(dbgs() << " original: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << SI << "\n"; } } while (0); | |||||
1504 | ||||||
1505 | IRBuilderTy IRB(&SI); | |||||
1506 | Value *TV = SI.getTrueValue(); | |||||
1507 | Value *FV = SI.getFalseValue(); | |||||
1508 | // Replace the loads of the select with a select of two loads. | |||||
1509 | while (!SI.use_empty()) { | |||||
1510 | LoadInst *LI = cast<LoadInst>(SI.user_back()); | |||||
1511 | assert(LI->isSimple() && "We only speculate simple loads")((LI->isSimple() && "We only speculate simple loads" ) ? static_cast<void> (0) : __assert_fail ("LI->isSimple() && \"We only speculate simple loads\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1511, __PRETTY_FUNCTION__)); | |||||
1512 | ||||||
1513 | IRB.SetInsertPoint(LI); | |||||
1514 | LoadInst *TL = | |||||
1515 | IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true"); | |||||
1516 | LoadInst *FL = | |||||
1517 | IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false"); | |||||
1518 | NumLoadsSpeculated += 2; | |||||
1519 | ||||||
1520 | // Transfer alignment and AA info if present. | |||||
1521 | TL->setAlignment(LI->getAlignment()); | |||||
1522 | FL->setAlignment(LI->getAlignment()); | |||||
1523 | ||||||
1524 | AAMDNodes Tags; | |||||
1525 | LI->getAAMetadata(Tags); | |||||
1526 | if (Tags) { | |||||
1527 | TL->setAAMetadata(Tags); | |||||
1528 | FL->setAAMetadata(Tags); | |||||
1529 | } | |||||
1530 | ||||||
1531 | Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, | |||||
1532 | LI->getName() + ".sroa.speculated"); | |||||
1533 | ||||||
1534 | DEBUG(dbgs() << " speculated to: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " speculated to: " << *V << "\n"; } } while (0); | |||||
1535 | LI->replaceAllUsesWith(V); | |||||
1536 | LI->eraseFromParent(); | |||||
1537 | } | |||||
1538 | SI.eraseFromParent(); | |||||
1539 | } | |||||
1540 | ||||||
1541 | /// \brief Build a GEP out of a base pointer and indices. | |||||
1542 | /// | |||||
1543 | /// This will return the BasePtr if that is valid, or build a new GEP | |||||
1544 | /// instruction using the IRBuilder if GEP-ing is needed. | |||||
1545 | static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, | |||||
1546 | SmallVectorImpl<Value *> &Indices, Twine NamePrefix) { | |||||
1547 | if (Indices.empty()) | |||||
1548 | return BasePtr; | |||||
1549 | ||||||
1550 | // A single zero index is a no-op, so check for this and avoid building a GEP | |||||
1551 | // in that case. | |||||
1552 | if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) | |||||
1553 | return BasePtr; | |||||
1554 | ||||||
1555 | return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices, | |||||
1556 | NamePrefix + "sroa_idx"); | |||||
1557 | } | |||||
1558 | ||||||
1559 | /// \brief Get a natural GEP off of the BasePtr walking through Ty toward | |||||
1560 | /// TargetTy without changing the offset of the pointer. | |||||
1561 | /// | |||||
1562 | /// This routine assumes we've already established a properly offset GEP with | |||||
1563 | /// Indices, and arrived at the Ty type. The goal is to continue to GEP with | |||||
1564 | /// zero-indices down through type layers until we find one the same as | |||||
1565 | /// TargetTy. If we can't find one with the same type, we at least try to use | |||||
1566 | /// one with the same size. If none of that works, we just produce the GEP as | |||||
1567 | /// indicated by Indices to have the correct offset. | |||||
1568 | static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, | |||||
1569 | Value *BasePtr, Type *Ty, Type *TargetTy, | |||||
1570 | SmallVectorImpl<Value *> &Indices, | |||||
1571 | Twine NamePrefix) { | |||||
1572 | if (Ty == TargetTy) | |||||
1573 | return buildGEP(IRB, BasePtr, Indices, NamePrefix); | |||||
1574 | ||||||
1575 | // Pointer size to use for the indices. | |||||
1576 | unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType()); | |||||
1577 | ||||||
1578 | // See if we can descend into a struct and locate a field with the correct | |||||
1579 | // type. | |||||
1580 | unsigned NumLayers = 0; | |||||
1581 | Type *ElementTy = Ty; | |||||
1582 | do { | |||||
1583 | if (ElementTy->isPointerTy()) | |||||
1584 | break; | |||||
1585 | ||||||
1586 | if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) { | |||||
1587 | ElementTy = ArrayTy->getElementType(); | |||||
1588 | Indices.push_back(IRB.getIntN(PtrSize, 0)); | |||||
1589 | } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) { | |||||
1590 | ElementTy = VectorTy->getElementType(); | |||||
1591 | Indices.push_back(IRB.getInt32(0)); | |||||
1592 | } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) { | |||||
1593 | if (STy->element_begin() == STy->element_end()) | |||||
1594 | break; // Nothing left to descend into. | |||||
1595 | ElementTy = *STy->element_begin(); | |||||
1596 | Indices.push_back(IRB.getInt32(0)); | |||||
1597 | } else { | |||||
1598 | break; | |||||
1599 | } | |||||
1600 | ++NumLayers; | |||||
1601 | } while (ElementTy != TargetTy); | |||||
1602 | if (ElementTy != TargetTy) | |||||
1603 | Indices.erase(Indices.end() - NumLayers, Indices.end()); | |||||
1604 | ||||||
1605 | return buildGEP(IRB, BasePtr, Indices, NamePrefix); | |||||
1606 | } | |||||
1607 | ||||||
1608 | /// \brief Recursively compute indices for a natural GEP. | |||||
1609 | /// | |||||
1610 | /// This is the recursive step for getNaturalGEPWithOffset that walks down the | |||||
1611 | /// element types adding appropriate indices for the GEP. | |||||
1612 | static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, | |||||
1613 | Value *Ptr, Type *Ty, APInt &Offset, | |||||
1614 | Type *TargetTy, | |||||
1615 | SmallVectorImpl<Value *> &Indices, | |||||
1616 | Twine NamePrefix) { | |||||
1617 | if (Offset == 0) | |||||
1618 | return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, | |||||
1619 | NamePrefix); | |||||
1620 | ||||||
1621 | // We can't recurse through pointer types. | |||||
1622 | if (Ty->isPointerTy()) | |||||
1623 | return nullptr; | |||||
1624 | ||||||
1625 | // We try to analyze GEPs over vectors here, but note that these GEPs are | |||||
1626 | // extremely poorly defined currently. The long-term goal is to remove GEPing | |||||
1627 | // over a vector from the IR completely. | |||||
1628 | if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) { | |||||
1629 | unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType()); | |||||
1630 | if (ElementSizeInBits % 8 != 0) { | |||||
1631 | // GEPs over non-multiple of 8 size vector elements are invalid. | |||||
1632 | return nullptr; | |||||
1633 | } | |||||
1634 | APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); | |||||
1635 | APInt NumSkippedElements = Offset.sdiv(ElementSize); | |||||
1636 | if (NumSkippedElements.ugt(VecTy->getNumElements())) | |||||
1637 | return nullptr; | |||||
1638 | Offset -= NumSkippedElements * ElementSize; | |||||
1639 | Indices.push_back(IRB.getInt(NumSkippedElements)); | |||||
1640 | return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(), | |||||
1641 | Offset, TargetTy, Indices, NamePrefix); | |||||
1642 | } | |||||
1643 | ||||||
1644 | if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { | |||||
1645 | Type *ElementTy = ArrTy->getElementType(); | |||||
1646 | APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); | |||||
1647 | APInt NumSkippedElements = Offset.sdiv(ElementSize); | |||||
1648 | if (NumSkippedElements.ugt(ArrTy->getNumElements())) | |||||
1649 | return nullptr; | |||||
1650 | ||||||
1651 | Offset -= NumSkippedElements * ElementSize; | |||||
1652 | Indices.push_back(IRB.getInt(NumSkippedElements)); | |||||
1653 | return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, | |||||
1654 | Indices, NamePrefix); | |||||
1655 | } | |||||
1656 | ||||||
1657 | StructType *STy = dyn_cast<StructType>(Ty); | |||||
1658 | if (!STy) | |||||
1659 | return nullptr; | |||||
1660 | ||||||
1661 | const StructLayout *SL = DL.getStructLayout(STy); | |||||
1662 | uint64_t StructOffset = Offset.getZExtValue(); | |||||
1663 | if (StructOffset >= SL->getSizeInBytes()) | |||||
1664 | return nullptr; | |||||
1665 | unsigned Index = SL->getElementContainingOffset(StructOffset); | |||||
1666 | Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); | |||||
1667 | Type *ElementTy = STy->getElementType(Index); | |||||
1668 | if (Offset.uge(DL.getTypeAllocSize(ElementTy))) | |||||
1669 | return nullptr; // The offset points into alignment padding. | |||||
1670 | ||||||
1671 | Indices.push_back(IRB.getInt32(Index)); | |||||
1672 | return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, | |||||
1673 | Indices, NamePrefix); | |||||
1674 | } | |||||
1675 | ||||||
1676 | /// \brief Get a natural GEP from a base pointer to a particular offset and | |||||
1677 | /// resulting in a particular type. | |||||
1678 | /// | |||||
1679 | /// The goal is to produce a "natural" looking GEP that works with the existing | |||||
1680 | /// composite types to arrive at the appropriate offset and element type for | |||||
1681 | /// a pointer. TargetTy is the element type the returned GEP should point-to if | |||||
1682 | /// possible. We recurse by decreasing Offset, adding the appropriate index to | |||||
1683 | /// Indices, and setting Ty to the result subtype. | |||||
1684 | /// | |||||
1685 | /// If no natural GEP can be constructed, this function returns null. | |||||
1686 | static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, | |||||
1687 | Value *Ptr, APInt Offset, Type *TargetTy, | |||||
1688 | SmallVectorImpl<Value *> &Indices, | |||||
1689 | Twine NamePrefix) { | |||||
1690 | PointerType *Ty = cast<PointerType>(Ptr->getType()); | |||||
1691 | ||||||
1692 | // Don't consider any GEPs through an i8* as natural unless the TargetTy is | |||||
1693 | // an i8. | |||||
1694 | if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8)) | |||||
1695 | return nullptr; | |||||
1696 | ||||||
1697 | Type *ElementTy = Ty->getElementType(); | |||||
1698 | if (!ElementTy->isSized()) | |||||
1699 | return nullptr; // We can't GEP through an unsized element. | |||||
1700 | APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); | |||||
1701 | if (ElementSize == 0) | |||||
1702 | return nullptr; // Zero-length arrays can't help us build a natural GEP. | |||||
1703 | APInt NumSkippedElements = Offset.sdiv(ElementSize); | |||||
1704 | ||||||
1705 | Offset -= NumSkippedElements * ElementSize; | |||||
1706 | Indices.push_back(IRB.getInt(NumSkippedElements)); | |||||
1707 | return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, | |||||
1708 | Indices, NamePrefix); | |||||
1709 | } | |||||
1710 | ||||||
1711 | /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the | |||||
1712 | /// resulting pointer has PointerTy. | |||||
1713 | /// | |||||
1714 | /// This tries very hard to compute a "natural" GEP which arrives at the offset | |||||
1715 | /// and produces the pointer type desired. Where it cannot, it will try to use | |||||
1716 | /// the natural GEP to arrive at the offset and bitcast to the type. Where that | |||||
1717 | /// fails, it will try to use an existing i8* and GEP to the byte offset and | |||||
1718 | /// bitcast to the type. | |||||
1719 | /// | |||||
1720 | /// The strategy for finding the more natural GEPs is to peel off layers of the | |||||
1721 | /// pointer, walking back through bit casts and GEPs, searching for a base | |||||
1722 | /// pointer from which we can compute a natural GEP with the desired | |||||
1723 | /// properties. The algorithm tries to fold as many constant indices into | |||||
1724 | /// a single GEP as possible, thus making each GEP more independent of the | |||||
1725 | /// surrounding code. | |||||
1726 | static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, | |||||
1727 | APInt Offset, Type *PointerTy, Twine NamePrefix) { | |||||
1728 | // Even though we don't look through PHI nodes, we could be called on an | |||||
1729 | // instruction in an unreachable block, which may be on a cycle. | |||||
1730 | SmallPtrSet<Value *, 4> Visited; | |||||
1731 | Visited.insert(Ptr); | |||||
1732 | SmallVector<Value *, 4> Indices; | |||||
1733 | ||||||
1734 | // We may end up computing an offset pointer that has the wrong type. If we | |||||
1735 | // never are able to compute one directly that has the correct type, we'll | |||||
1736 | // fall back to it, so keep it and the base it was computed from around here. | |||||
1737 | Value *OffsetPtr = nullptr; | |||||
1738 | Value *OffsetBasePtr; | |||||
1739 | ||||||
1740 | // Remember any i8 pointer we come across to re-use if we need to do a raw | |||||
1741 | // byte offset. | |||||
1742 | Value *Int8Ptr = nullptr; | |||||
1743 | APInt Int8PtrOffset(Offset.getBitWidth(), 0); | |||||
1744 | ||||||
1745 | Type *TargetTy = PointerTy->getPointerElementType(); | |||||
1746 | ||||||
1747 | do { | |||||
1748 | // First fold any existing GEPs into the offset. | |||||
1749 | while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { | |||||
1750 | APInt GEPOffset(Offset.getBitWidth(), 0); | |||||
1751 | if (!GEP->accumulateConstantOffset(DL, GEPOffset)) | |||||
1752 | break; | |||||
1753 | Offset += GEPOffset; | |||||
1754 | Ptr = GEP->getPointerOperand(); | |||||
1755 | if (!Visited.insert(Ptr).second) | |||||
1756 | break; | |||||
1757 | } | |||||
1758 | ||||||
1759 | // See if we can perform a natural GEP here. | |||||
1760 | Indices.clear(); | |||||
1761 | if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy, | |||||
1762 | Indices, NamePrefix)) { | |||||
1763 | // If we have a new natural pointer at the offset, clear out any old | |||||
1764 | // offset pointer we computed. Unless it is the base pointer or | |||||
1765 | // a non-instruction, we built a GEP we don't need. Zap it. | |||||
1766 | if (OffsetPtr && OffsetPtr != OffsetBasePtr) | |||||
1767 | if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) { | |||||
1768 | assert(I->use_empty() && "Built a GEP with uses some how!")((I->use_empty() && "Built a GEP with uses some how!" ) ? static_cast<void> (0) : __assert_fail ("I->use_empty() && \"Built a GEP with uses some how!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1768, __PRETTY_FUNCTION__)); | |||||
1769 | I->eraseFromParent(); | |||||
1770 | } | |||||
1771 | OffsetPtr = P; | |||||
1772 | OffsetBasePtr = Ptr; | |||||
1773 | // If we also found a pointer of the right type, we're done. | |||||
1774 | if (P->getType() == PointerTy) | |||||
1775 | return P; | |||||
1776 | } | |||||
1777 | ||||||
1778 | // Stash this pointer if we've found an i8*. | |||||
1779 | if (Ptr->getType()->isIntegerTy(8)) { | |||||
1780 | Int8Ptr = Ptr; | |||||
1781 | Int8PtrOffset = Offset; | |||||
1782 | } | |||||
1783 | ||||||
1784 | // Peel off a layer of the pointer and update the offset appropriately. | |||||
1785 | if (Operator::getOpcode(Ptr) == Instruction::BitCast) { | |||||
1786 | Ptr = cast<Operator>(Ptr)->getOperand(0); | |||||
1787 | } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { | |||||
1788 | if (GA->mayBeOverridden()) | |||||
1789 | break; | |||||
1790 | Ptr = GA->getAliasee(); | |||||
1791 | } else { | |||||
1792 | break; | |||||
1793 | } | |||||
1794 | assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!")((Ptr->getType()->isPointerTy() && "Unexpected operand type!" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->isPointerTy() && \"Unexpected operand type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1794, __PRETTY_FUNCTION__)); | |||||
1795 | } while (Visited.insert(Ptr).second); | |||||
1796 | ||||||
1797 | if (!OffsetPtr) { | |||||
1798 | if (!Int8Ptr) { | |||||
1799 | Int8Ptr = IRB.CreateBitCast( | |||||
1800 | Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()), | |||||
1801 | NamePrefix + "sroa_raw_cast"); | |||||
1802 | Int8PtrOffset = Offset; | |||||
1803 | } | |||||
1804 | ||||||
1805 | OffsetPtr = Int8PtrOffset == 0 | |||||
1806 | ? Int8Ptr | |||||
1807 | : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr, | |||||
1808 | IRB.getInt(Int8PtrOffset), | |||||
1809 | NamePrefix + "sroa_raw_idx"); | |||||
1810 | } | |||||
1811 | Ptr = OffsetPtr; | |||||
1812 | ||||||
1813 | // On the off chance we were targeting i8*, guard the bitcast here. | |||||
1814 | if (Ptr->getType() != PointerTy) | |||||
1815 | Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast"); | |||||
1816 | ||||||
1817 | return Ptr; | |||||
1818 | } | |||||
1819 | ||||||
1820 | /// \brief Compute the adjusted alignment for a load or store from an offset. | |||||
1821 | static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, | |||||
1822 | const DataLayout &DL) { | |||||
1823 | unsigned Alignment; | |||||
1824 | Type *Ty; | |||||
1825 | if (auto *LI = dyn_cast<LoadInst>(I)) { | |||||
1826 | Alignment = LI->getAlignment(); | |||||
1827 | Ty = LI->getType(); | |||||
1828 | } else if (auto *SI = dyn_cast<StoreInst>(I)) { | |||||
1829 | Alignment = SI->getAlignment(); | |||||
1830 | Ty = SI->getValueOperand()->getType(); | |||||
1831 | } else { | |||||
1832 | llvm_unreachable("Only loads and stores are allowed!")::llvm::llvm_unreachable_internal("Only loads and stores are allowed!" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1832); | |||||
1833 | } | |||||
1834 | ||||||
1835 | if (!Alignment) | |||||
1836 | Alignment = DL.getABITypeAlignment(Ty); | |||||
1837 | ||||||
1838 | return MinAlign(Alignment, Offset); | |||||
1839 | } | |||||
1840 | ||||||
1841 | /// \brief Test whether we can convert a value from the old to the new type. | |||||
1842 | /// | |||||
1843 | /// This predicate should be used to guard calls to convertValue in order to | |||||
1844 | /// ensure that we only try to convert viable values. The strategy is that we | |||||
1845 | /// will peel off single element struct and array wrappings to get to an | |||||
1846 | /// underlying value, and convert that value. | |||||
1847 | static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { | |||||
1848 | if (OldTy == NewTy) | |||||
1849 | return true; | |||||
1850 | if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) | |||||
1851 | if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) | |||||
1852 | if (NewITy->getBitWidth() >= OldITy->getBitWidth()) | |||||
1853 | return true; | |||||
1854 | if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) | |||||
1855 | return false; | |||||
1856 | if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) | |||||
1857 | return false; | |||||
1858 | ||||||
1859 | // We can convert pointers to integers and vice-versa. Same for vectors | |||||
1860 | // of pointers and integers. | |||||
1861 | OldTy = OldTy->getScalarType(); | |||||
1862 | NewTy = NewTy->getScalarType(); | |||||
1863 | if (NewTy->isPointerTy() || OldTy->isPointerTy()) { | |||||
1864 | if (NewTy->isPointerTy() && OldTy->isPointerTy()) | |||||
1865 | return true; | |||||
1866 | if (NewTy->isIntegerTy() || OldTy->isIntegerTy()) | |||||
1867 | return true; | |||||
1868 | return false; | |||||
1869 | } | |||||
1870 | ||||||
1871 | return true; | |||||
1872 | } | |||||
1873 | ||||||
1874 | /// \brief Generic routine to convert an SSA value to a value of a different | |||||
1875 | /// type. | |||||
1876 | /// | |||||
1877 | /// This will try various different casting techniques, such as bitcasts, | |||||
1878 | /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test | |||||
1879 | /// two types for viability with this routine. | |||||
1880 | static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, | |||||
1881 | Type *NewTy) { | |||||
1882 | Type *OldTy = V->getType(); | |||||
1883 | assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type")((canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type" ) ? static_cast<void> (0) : __assert_fail ("canConvertValue(DL, OldTy, NewTy) && \"Value not convertable to type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1883, __PRETTY_FUNCTION__)); | |||||
1884 | ||||||
1885 | if (OldTy == NewTy) | |||||
1886 | return V; | |||||
1887 | ||||||
1888 | if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) | |||||
1889 | if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) | |||||
1890 | if (NewITy->getBitWidth() > OldITy->getBitWidth()) | |||||
1891 | return IRB.CreateZExt(V, NewITy); | |||||
1892 | ||||||
1893 | // See if we need inttoptr for this type pair. A cast involving both scalars | |||||
1894 | // and vectors requires and additional bitcast. | |||||
1895 | if (OldTy->getScalarType()->isIntegerTy() && | |||||
1896 | NewTy->getScalarType()->isPointerTy()) { | |||||
1897 | // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* | |||||
1898 | if (OldTy->isVectorTy() && !NewTy->isVectorTy()) | |||||
1899 | return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), | |||||
1900 | NewTy); | |||||
1901 | ||||||
1902 | // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*> | |||||
1903 | if (!OldTy->isVectorTy() && NewTy->isVectorTy()) | |||||
1904 | return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), | |||||
1905 | NewTy); | |||||
1906 | ||||||
1907 | return IRB.CreateIntToPtr(V, NewTy); | |||||
1908 | } | |||||
1909 | ||||||
1910 | // See if we need ptrtoint for this type pair. A cast involving both scalars | |||||
1911 | // and vectors requires and additional bitcast. | |||||
1912 | if (OldTy->getScalarType()->isPointerTy() && | |||||
1913 | NewTy->getScalarType()->isIntegerTy()) { | |||||
1914 | // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128 | |||||
1915 | if (OldTy->isVectorTy() && !NewTy->isVectorTy()) | |||||
1916 | return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), | |||||
1917 | NewTy); | |||||
1918 | ||||||
1919 | // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32> | |||||
1920 | if (!OldTy->isVectorTy() && NewTy->isVectorTy()) | |||||
1921 | return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), | |||||
1922 | NewTy); | |||||
1923 | ||||||
1924 | return IRB.CreatePtrToInt(V, NewTy); | |||||
1925 | } | |||||
1926 | ||||||
1927 | return IRB.CreateBitCast(V, NewTy); | |||||
1928 | } | |||||
1929 | ||||||
1930 | /// \brief Test whether the given slice use can be promoted to a vector. | |||||
1931 | /// | |||||
1932 | /// This function is called to test each entry in a partioning which is slated | |||||
1933 | /// for a single slice. | |||||
1934 | static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P, | |||||
1935 | const Slice &S, VectorType *Ty, | |||||
1936 | uint64_t ElementSize, | |||||
1937 | const DataLayout &DL) { | |||||
1938 | // First validate the slice offsets. | |||||
1939 | uint64_t BeginOffset = | |||||
1940 | std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset(); | |||||
1941 | uint64_t BeginIndex = BeginOffset / ElementSize; | |||||
1942 | if (BeginIndex * ElementSize != BeginOffset || | |||||
1943 | BeginIndex >= Ty->getNumElements()) | |||||
1944 | return false; | |||||
1945 | uint64_t EndOffset = | |||||
1946 | std::min(S.endOffset(), P.endOffset()) - P.beginOffset(); | |||||
1947 | uint64_t EndIndex = EndOffset / ElementSize; | |||||
1948 | if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->getNumElements()) | |||||
1949 | return false; | |||||
1950 | ||||||
1951 | assert(EndIndex > BeginIndex && "Empty vector!")((EndIndex > BeginIndex && "Empty vector!") ? static_cast <void> (0) : __assert_fail ("EndIndex > BeginIndex && \"Empty vector!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1951, __PRETTY_FUNCTION__)); | |||||
1952 | uint64_t NumElements = EndIndex - BeginIndex; | |||||
1953 | Type *SliceTy = (NumElements == 1) | |||||
1954 | ? Ty->getElementType() | |||||
1955 | : VectorType::get(Ty->getElementType(), NumElements); | |||||
1956 | ||||||
1957 | Type *SplitIntTy = | |||||
1958 | Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); | |||||
1959 | ||||||
1960 | Use *U = S.getUse(); | |||||
1961 | ||||||
1962 | if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { | |||||
1963 | if (MI->isVolatile()) | |||||
1964 | return false; | |||||
1965 | if (!S.isSplittable()) | |||||
1966 | return false; // Skip any unsplittable intrinsics. | |||||
1967 | } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { | |||||
1968 | if (II->getIntrinsicID() != Intrinsic::lifetime_start && | |||||
1969 | II->getIntrinsicID() != Intrinsic::lifetime_end) | |||||
1970 | return false; | |||||
1971 | } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { | |||||
1972 | // Disable vector promotion when there are loads or stores of an FCA. | |||||
1973 | return false; | |||||
1974 | } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { | |||||
1975 | if (LI->isVolatile()) | |||||
1976 | return false; | |||||
1977 | Type *LTy = LI->getType(); | |||||
1978 | if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { | |||||
1979 | assert(LTy->isIntegerTy())((LTy->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("LTy->isIntegerTy()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1979, __PRETTY_FUNCTION__)); | |||||
1980 | LTy = SplitIntTy; | |||||
1981 | } | |||||
1982 | if (!canConvertValue(DL, SliceTy, LTy)) | |||||
1983 | return false; | |||||
1984 | } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { | |||||
1985 | if (SI->isVolatile()) | |||||
1986 | return false; | |||||
1987 | Type *STy = SI->getValueOperand()->getType(); | |||||
1988 | if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { | |||||
1989 | assert(STy->isIntegerTy())((STy->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("STy->isIntegerTy()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 1989, __PRETTY_FUNCTION__)); | |||||
1990 | STy = SplitIntTy; | |||||
1991 | } | |||||
1992 | if (!canConvertValue(DL, STy, SliceTy)) | |||||
1993 | return false; | |||||
1994 | } else { | |||||
1995 | return false; | |||||
1996 | } | |||||
1997 | ||||||
1998 | return true; | |||||
1999 | } | |||||
2000 | ||||||
2001 | /// \brief Test whether the given alloca partitioning and range of slices can be | |||||
2002 | /// promoted to a vector. | |||||
2003 | /// | |||||
2004 | /// This is a quick test to check whether we can rewrite a particular alloca | |||||
2005 | /// partition (and its newly formed alloca) into a vector alloca with only | |||||
2006 | /// whole-vector loads and stores such that it could be promoted to a vector | |||||
2007 | /// SSA value. We only can ensure this for a limited set of operations, and we | |||||
2008 | /// don't want to do the rewrites unless we are confident that the result will | |||||
2009 | /// be promotable, so we have an early test here. | |||||
2010 | static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P, | |||||
2011 | const DataLayout &DL) { | |||||
2012 | // Collect the candidate types for vector-based promotion. Also track whether | |||||
2013 | // we have different element types. | |||||
2014 | SmallVector<VectorType *, 4> CandidateTys; | |||||
2015 | Type *CommonEltTy = nullptr; | |||||
2016 | bool HaveCommonEltTy = true; | |||||
2017 | auto CheckCandidateType = [&](Type *Ty) { | |||||
2018 | if (auto *VTy = dyn_cast<VectorType>(Ty)) { | |||||
2019 | CandidateTys.push_back(VTy); | |||||
2020 | if (!CommonEltTy) | |||||
2021 | CommonEltTy = VTy->getElementType(); | |||||
2022 | else if (CommonEltTy != VTy->getElementType()) | |||||
2023 | HaveCommonEltTy = false; | |||||
2024 | } | |||||
2025 | }; | |||||
2026 | // Consider any loads or stores that are the exact size of the slice. | |||||
2027 | for (const Slice &S : P) | |||||
2028 | if (S.beginOffset() == P.beginOffset() && | |||||
2029 | S.endOffset() == P.endOffset()) { | |||||
2030 | if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser())) | |||||
2031 | CheckCandidateType(LI->getType()); | |||||
2032 | else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) | |||||
2033 | CheckCandidateType(SI->getValueOperand()->getType()); | |||||
2034 | } | |||||
2035 | ||||||
2036 | // If we didn't find a vector type, nothing to do here. | |||||
2037 | if (CandidateTys.empty()) | |||||
2038 | return nullptr; | |||||
2039 | ||||||
2040 | // Remove non-integer vector types if we had multiple common element types. | |||||
2041 | // FIXME: It'd be nice to replace them with integer vector types, but we can't | |||||
2042 | // do that until all the backends are known to produce good code for all | |||||
2043 | // integer vector types. | |||||
2044 | if (!HaveCommonEltTy) { | |||||
2045 | CandidateTys.erase(std::remove_if(CandidateTys.begin(), CandidateTys.end(), | |||||
2046 | [](VectorType *VTy) { | |||||
2047 | return !VTy->getElementType()->isIntegerTy(); | |||||
2048 | }), | |||||
2049 | CandidateTys.end()); | |||||
2050 | ||||||
2051 | // If there were no integer vector types, give up. | |||||
2052 | if (CandidateTys.empty()) | |||||
2053 | return nullptr; | |||||
2054 | ||||||
2055 | // Rank the remaining candidate vector types. This is easy because we know | |||||
2056 | // they're all integer vectors. We sort by ascending number of elements. | |||||
2057 | auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) { | |||||
2058 | assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) &&((DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) && "Cannot have vector types of different sizes!") ? static_cast <void> (0) : __assert_fail ("DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) && \"Cannot have vector types of different sizes!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2059, __PRETTY_FUNCTION__)) | |||||
2059 | "Cannot have vector types of different sizes!")((DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) && "Cannot have vector types of different sizes!") ? static_cast <void> (0) : __assert_fail ("DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) && \"Cannot have vector types of different sizes!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2059, __PRETTY_FUNCTION__)); | |||||
2060 | assert(RHSTy->getElementType()->isIntegerTy() &&((RHSTy->getElementType()->isIntegerTy() && "All non-integer types eliminated!" ) ? static_cast<void> (0) : __assert_fail ("RHSTy->getElementType()->isIntegerTy() && \"All non-integer types eliminated!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2061, __PRETTY_FUNCTION__)) | |||||
2061 | "All non-integer types eliminated!")((RHSTy->getElementType()->isIntegerTy() && "All non-integer types eliminated!" ) ? static_cast<void> (0) : __assert_fail ("RHSTy->getElementType()->isIntegerTy() && \"All non-integer types eliminated!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2061, __PRETTY_FUNCTION__)); | |||||
2062 | assert(LHSTy->getElementType()->isIntegerTy() &&((LHSTy->getElementType()->isIntegerTy() && "All non-integer types eliminated!" ) ? static_cast<void> (0) : __assert_fail ("LHSTy->getElementType()->isIntegerTy() && \"All non-integer types eliminated!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2063, __PRETTY_FUNCTION__)) | |||||
2063 | "All non-integer types eliminated!")((LHSTy->getElementType()->isIntegerTy() && "All non-integer types eliminated!" ) ? static_cast<void> (0) : __assert_fail ("LHSTy->getElementType()->isIntegerTy() && \"All non-integer types eliminated!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2063, __PRETTY_FUNCTION__)); | |||||
2064 | return RHSTy->getNumElements() < LHSTy->getNumElements(); | |||||
2065 | }; | |||||
2066 | std::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes); | |||||
2067 | CandidateTys.erase( | |||||
2068 | std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes), | |||||
2069 | CandidateTys.end()); | |||||
2070 | } else { | |||||
2071 | // The only way to have the same element type in every vector type is to | |||||
2072 | // have the same vector type. Check that and remove all but one. | |||||
2073 | #ifndef NDEBUG | |||||
2074 | for (VectorType *VTy : CandidateTys) { | |||||
2075 | assert(VTy->getElementType() == CommonEltTy &&((VTy->getElementType() == CommonEltTy && "Unaccounted for element type!" ) ? static_cast<void> (0) : __assert_fail ("VTy->getElementType() == CommonEltTy && \"Unaccounted for element type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2076, __PRETTY_FUNCTION__)) | |||||
2076 | "Unaccounted for element type!")((VTy->getElementType() == CommonEltTy && "Unaccounted for element type!" ) ? static_cast<void> (0) : __assert_fail ("VTy->getElementType() == CommonEltTy && \"Unaccounted for element type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2076, __PRETTY_FUNCTION__)); | |||||
2077 | assert(VTy == CandidateTys[0] &&((VTy == CandidateTys[0] && "Different vector types with the same element type!" ) ? static_cast<void> (0) : __assert_fail ("VTy == CandidateTys[0] && \"Different vector types with the same element type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2078, __PRETTY_FUNCTION__)) | |||||
2078 | "Different vector types with the same element type!")((VTy == CandidateTys[0] && "Different vector types with the same element type!" ) ? static_cast<void> (0) : __assert_fail ("VTy == CandidateTys[0] && \"Different vector types with the same element type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2078, __PRETTY_FUNCTION__)); | |||||
2079 | } | |||||
2080 | #endif | |||||
2081 | CandidateTys.resize(1); | |||||
2082 | } | |||||
2083 | ||||||
2084 | // Try each vector type, and return the one which works. | |||||
2085 | auto CheckVectorTypeForPromotion = [&](VectorType *VTy) { | |||||
2086 | uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType()); | |||||
2087 | ||||||
2088 | // While the definition of LLVM vectors is bitpacked, we don't support sizes | |||||
2089 | // that aren't byte sized. | |||||
2090 | if (ElementSize % 8) | |||||
2091 | return false; | |||||
2092 | assert((DL.getTypeSizeInBits(VTy) % 8) == 0 &&(((DL.getTypeSizeInBits(VTy) % 8) == 0 && "vector size not a multiple of element size?" ) ? static_cast<void> (0) : __assert_fail ("(DL.getTypeSizeInBits(VTy) % 8) == 0 && \"vector size not a multiple of element size?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2093, __PRETTY_FUNCTION__)) | |||||
2093 | "vector size not a multiple of element size?")(((DL.getTypeSizeInBits(VTy) % 8) == 0 && "vector size not a multiple of element size?" ) ? static_cast<void> (0) : __assert_fail ("(DL.getTypeSizeInBits(VTy) % 8) == 0 && \"vector size not a multiple of element size?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2093, __PRETTY_FUNCTION__)); | |||||
2094 | ElementSize /= 8; | |||||
2095 | ||||||
2096 | for (const Slice &S : P) | |||||
2097 | if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL)) | |||||
2098 | return false; | |||||
2099 | ||||||
2100 | for (const Slice *S : P.splitSliceTails()) | |||||
2101 | if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL)) | |||||
2102 | return false; | |||||
2103 | ||||||
2104 | return true; | |||||
2105 | }; | |||||
2106 | for (VectorType *VTy : CandidateTys) | |||||
2107 | if (CheckVectorTypeForPromotion(VTy)) | |||||
2108 | return VTy; | |||||
2109 | ||||||
2110 | return nullptr; | |||||
2111 | } | |||||
2112 | ||||||
2113 | /// \brief Test whether a slice of an alloca is valid for integer widening. | |||||
2114 | /// | |||||
2115 | /// This implements the necessary checking for the \c isIntegerWideningViable | |||||
2116 | /// test below on a single slice of the alloca. | |||||
2117 | static bool isIntegerWideningViableForSlice(const Slice &S, | |||||
2118 | uint64_t AllocBeginOffset, | |||||
2119 | Type *AllocaTy, | |||||
2120 | const DataLayout &DL, | |||||
2121 | bool &WholeAllocaOp) { | |||||
2122 | uint64_t Size = DL.getTypeStoreSize(AllocaTy); | |||||
2123 | ||||||
2124 | uint64_t RelBegin = S.beginOffset() - AllocBeginOffset; | |||||
2125 | uint64_t RelEnd = S.endOffset() - AllocBeginOffset; | |||||
2126 | ||||||
2127 | // We can't reasonably handle cases where the load or store extends past | |||||
2128 | // the end of the aloca's type and into its padding. | |||||
2129 | if (RelEnd > Size) | |||||
2130 | return false; | |||||
2131 | ||||||
2132 | Use *U = S.getUse(); | |||||
2133 | ||||||
2134 | if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { | |||||
2135 | if (LI->isVolatile()) | |||||
2136 | return false; | |||||
2137 | // Note that we don't count vector loads or stores as whole-alloca | |||||
2138 | // operations which enable integer widening because we would prefer to use | |||||
2139 | // vector widening instead. | |||||
2140 | if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size) | |||||
2141 | WholeAllocaOp = true; | |||||
2142 | if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { | |||||
2143 | if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy)) | |||||
2144 | return false; | |||||
2145 | } else if (RelBegin != 0 || RelEnd != Size || | |||||
2146 | !canConvertValue(DL, AllocaTy, LI->getType())) { | |||||
2147 | // Non-integer loads need to be convertible from the alloca type so that | |||||
2148 | // they are promotable. | |||||
2149 | return false; | |||||
2150 | } | |||||
2151 | } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { | |||||
2152 | Type *ValueTy = SI->getValueOperand()->getType(); | |||||
2153 | if (SI->isVolatile()) | |||||
2154 | return false; | |||||
2155 | // Note that we don't count vector loads or stores as whole-alloca | |||||
2156 | // operations which enable integer widening because we would prefer to use | |||||
2157 | // vector widening instead. | |||||
2158 | if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size) | |||||
2159 | WholeAllocaOp = true; | |||||
2160 | if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { | |||||
2161 | if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy)) | |||||
2162 | return false; | |||||
2163 | } else if (RelBegin != 0 || RelEnd != Size || | |||||
2164 | !canConvertValue(DL, ValueTy, AllocaTy)) { | |||||
2165 | // Non-integer stores need to be convertible to the alloca type so that | |||||
2166 | // they are promotable. | |||||
2167 | return false; | |||||
2168 | } | |||||
2169 | } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { | |||||
2170 | if (MI->isVolatile() || !isa<Constant>(MI->getLength())) | |||||
2171 | return false; | |||||
2172 | if (!S.isSplittable()) | |||||
2173 | return false; // Skip any unsplittable intrinsics. | |||||
2174 | } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { | |||||
2175 | if (II->getIntrinsicID() != Intrinsic::lifetime_start && | |||||
2176 | II->getIntrinsicID() != Intrinsic::lifetime_end) | |||||
2177 | return false; | |||||
2178 | } else { | |||||
2179 | return false; | |||||
2180 | } | |||||
2181 | ||||||
2182 | return true; | |||||
2183 | } | |||||
2184 | ||||||
2185 | /// \brief Test whether the given alloca partition's integer operations can be | |||||
2186 | /// widened to promotable ones. | |||||
2187 | /// | |||||
2188 | /// This is a quick test to check whether we can rewrite the integer loads and | |||||
2189 | /// stores to a particular alloca into wider loads and stores and be able to | |||||
2190 | /// promote the resulting alloca. | |||||
2191 | static bool isIntegerWideningViable(AllocaSlices::Partition &P, Type *AllocaTy, | |||||
2192 | const DataLayout &DL) { | |||||
2193 | uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy); | |||||
2194 | // Don't create integer types larger than the maximum bitwidth. | |||||
2195 | if (SizeInBits > IntegerType::MAX_INT_BITS) | |||||
2196 | return false; | |||||
2197 | ||||||
2198 | // Don't try to handle allocas with bit-padding. | |||||
2199 | if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy)) | |||||
2200 | return false; | |||||
2201 | ||||||
2202 | // We need to ensure that an integer type with the appropriate bitwidth can | |||||
2203 | // be converted to the alloca type, whatever that is. We don't want to force | |||||
2204 | // the alloca itself to have an integer type if there is a more suitable one. | |||||
2205 | Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits); | |||||
2206 | if (!canConvertValue(DL, AllocaTy, IntTy) || | |||||
2207 | !canConvertValue(DL, IntTy, AllocaTy)) | |||||
2208 | return false; | |||||
2209 | ||||||
2210 | // While examining uses, we ensure that the alloca has a covering load or | |||||
2211 | // store. We don't want to widen the integer operations only to fail to | |||||
2212 | // promote due to some other unsplittable entry (which we may make splittable | |||||
2213 | // later). However, if there are only splittable uses, go ahead and assume | |||||
2214 | // that we cover the alloca. | |||||
2215 | // FIXME: We shouldn't consider split slices that happen to start in the | |||||
2216 | // partition here... | |||||
2217 | bool WholeAllocaOp = | |||||
2218 | P.begin() != P.end() ? false : DL.isLegalInteger(SizeInBits); | |||||
2219 | ||||||
2220 | for (const Slice &S : P) | |||||
2221 | if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL, | |||||
2222 | WholeAllocaOp)) | |||||
2223 | return false; | |||||
2224 | ||||||
2225 | for (const Slice *S : P.splitSliceTails()) | |||||
2226 | if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL, | |||||
2227 | WholeAllocaOp)) | |||||
2228 | return false; | |||||
2229 | ||||||
2230 | return WholeAllocaOp; | |||||
2231 | } | |||||
2232 | ||||||
2233 | static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, | |||||
2234 | IntegerType *Ty, uint64_t Offset, | |||||
2235 | const Twine &Name) { | |||||
2236 | DEBUG(dbgs() << " start: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " start: " << *V << "\n"; } } while (0); | |||||
2237 | IntegerType *IntTy = cast<IntegerType>(V->getType()); | |||||
2238 | assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&((DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize( IntTy) && "Element extends past full value") ? static_cast <void> (0) : __assert_fail ("DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && \"Element extends past full value\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2239, __PRETTY_FUNCTION__)) | |||||
2239 | "Element extends past full value")((DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize( IntTy) && "Element extends past full value") ? static_cast <void> (0) : __assert_fail ("DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && \"Element extends past full value\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2239, __PRETTY_FUNCTION__)); | |||||
2240 | uint64_t ShAmt = 8 * Offset; | |||||
2241 | if (DL.isBigEndian()) | |||||
2242 | ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); | |||||
2243 | if (ShAmt) { | |||||
2244 | V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); | |||||
2245 | DEBUG(dbgs() << " shifted: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " shifted: " << *V << "\n"; } } while (0); | |||||
2246 | } | |||||
2247 | assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&((Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot extract to a larger integer!") ? static_cast<void > (0) : __assert_fail ("Ty->getBitWidth() <= IntTy->getBitWidth() && \"Cannot extract to a larger integer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2248, __PRETTY_FUNCTION__)) | |||||
2248 | "Cannot extract to a larger integer!")((Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot extract to a larger integer!") ? static_cast<void > (0) : __assert_fail ("Ty->getBitWidth() <= IntTy->getBitWidth() && \"Cannot extract to a larger integer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2248, __PRETTY_FUNCTION__)); | |||||
2249 | if (Ty != IntTy) { | |||||
2250 | V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); | |||||
2251 | DEBUG(dbgs() << " trunced: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " trunced: " << *V << "\n"; } } while (0); | |||||
2252 | } | |||||
2253 | return V; | |||||
2254 | } | |||||
2255 | ||||||
2256 | static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, | |||||
2257 | Value *V, uint64_t Offset, const Twine &Name) { | |||||
2258 | IntegerType *IntTy = cast<IntegerType>(Old->getType()); | |||||
2259 | IntegerType *Ty = cast<IntegerType>(V->getType()); | |||||
2260 | assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&((Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!") ? static_cast<void> (0) : __assert_fail ("Ty->getBitWidth() <= IntTy->getBitWidth() && \"Cannot insert a larger integer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2261, __PRETTY_FUNCTION__)) | |||||
2261 | "Cannot insert a larger integer!")((Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!") ? static_cast<void> (0) : __assert_fail ("Ty->getBitWidth() <= IntTy->getBitWidth() && \"Cannot insert a larger integer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2261, __PRETTY_FUNCTION__)); | |||||
2262 | DEBUG(dbgs() << " start: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " start: " << *V << "\n"; } } while (0); | |||||
2263 | if (Ty != IntTy) { | |||||
2264 | V = IRB.CreateZExt(V, IntTy, Name + ".ext"); | |||||
2265 | DEBUG(dbgs() << " extended: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " extended: " << *V << "\n"; } } while (0); | |||||
2266 | } | |||||
2267 | assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&((DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize( IntTy) && "Element store outside of alloca store") ? static_cast <void> (0) : __assert_fail ("DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && \"Element store outside of alloca store\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2268, __PRETTY_FUNCTION__)) | |||||
2268 | "Element store outside of alloca store")((DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize( IntTy) && "Element store outside of alloca store") ? static_cast <void> (0) : __assert_fail ("DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && \"Element store outside of alloca store\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2268, __PRETTY_FUNCTION__)); | |||||
2269 | uint64_t ShAmt = 8 * Offset; | |||||
2270 | if (DL.isBigEndian()) | |||||
2271 | ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); | |||||
2272 | if (ShAmt) { | |||||
2273 | V = IRB.CreateShl(V, ShAmt, Name + ".shift"); | |||||
2274 | DEBUG(dbgs() << " shifted: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " shifted: " << *V << "\n"; } } while (0); | |||||
2275 | } | |||||
2276 | ||||||
2277 | if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { | |||||
2278 | APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); | |||||
2279 | Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); | |||||
2280 | DEBUG(dbgs() << " masked: " << *Old << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " masked: " << *Old << "\n"; } } while (0); | |||||
2281 | V = IRB.CreateOr(Old, V, Name + ".insert"); | |||||
2282 | DEBUG(dbgs() << " inserted: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " inserted: " << *V << "\n"; } } while (0); | |||||
2283 | } | |||||
2284 | return V; | |||||
2285 | } | |||||
2286 | ||||||
2287 | static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, | |||||
2288 | unsigned EndIndex, const Twine &Name) { | |||||
2289 | VectorType *VecTy = cast<VectorType>(V->getType()); | |||||
2290 | unsigned NumElements = EndIndex - BeginIndex; | |||||
2291 | assert(NumElements <= VecTy->getNumElements() && "Too many elements!")((NumElements <= VecTy->getNumElements() && "Too many elements!" ) ? static_cast<void> (0) : __assert_fail ("NumElements <= VecTy->getNumElements() && \"Too many elements!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2291, __PRETTY_FUNCTION__)); | |||||
2292 | ||||||
2293 | if (NumElements == VecTy->getNumElements()) | |||||
2294 | return V; | |||||
2295 | ||||||
2296 | if (NumElements == 1) { | |||||
2297 | V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), | |||||
2298 | Name + ".extract"); | |||||
2299 | DEBUG(dbgs() << " extract: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " extract: " << *V << "\n"; } } while (0); | |||||
2300 | return V; | |||||
2301 | } | |||||
2302 | ||||||
2303 | SmallVector<Constant *, 8> Mask; | |||||
2304 | Mask.reserve(NumElements); | |||||
2305 | for (unsigned i = BeginIndex; i != EndIndex; ++i) | |||||
2306 | Mask.push_back(IRB.getInt32(i)); | |||||
2307 | V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), | |||||
2308 | ConstantVector::get(Mask), Name + ".extract"); | |||||
2309 | DEBUG(dbgs() << " shuffle: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " shuffle: " << *V << "\n"; } } while (0); | |||||
2310 | return V; | |||||
2311 | } | |||||
2312 | ||||||
2313 | static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, | |||||
2314 | unsigned BeginIndex, const Twine &Name) { | |||||
2315 | VectorType *VecTy = cast<VectorType>(Old->getType()); | |||||
2316 | assert(VecTy && "Can only insert a vector into a vector")((VecTy && "Can only insert a vector into a vector") ? static_cast<void> (0) : __assert_fail ("VecTy && \"Can only insert a vector into a vector\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2316, __PRETTY_FUNCTION__)); | |||||
2317 | ||||||
2318 | VectorType *Ty = dyn_cast<VectorType>(V->getType()); | |||||
2319 | if (!Ty) { | |||||
2320 | // Single element to insert. | |||||
2321 | V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), | |||||
2322 | Name + ".insert"); | |||||
2323 | DEBUG(dbgs() << " insert: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " insert: " << *V << "\n"; } } while (0); | |||||
2324 | return V; | |||||
2325 | } | |||||
2326 | ||||||
2327 | assert(Ty->getNumElements() <= VecTy->getNumElements() &&((Ty->getNumElements() <= VecTy->getNumElements() && "Too many elements!") ? static_cast<void> (0) : __assert_fail ("Ty->getNumElements() <= VecTy->getNumElements() && \"Too many elements!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2328, __PRETTY_FUNCTION__)) | |||||
2328 | "Too many elements!")((Ty->getNumElements() <= VecTy->getNumElements() && "Too many elements!") ? static_cast<void> (0) : __assert_fail ("Ty->getNumElements() <= VecTy->getNumElements() && \"Too many elements!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2328, __PRETTY_FUNCTION__)); | |||||
2329 | if (Ty->getNumElements() == VecTy->getNumElements()) { | |||||
2330 | assert(V->getType() == VecTy && "Vector type mismatch")((V->getType() == VecTy && "Vector type mismatch") ? static_cast<void> (0) : __assert_fail ("V->getType() == VecTy && \"Vector type mismatch\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2330, __PRETTY_FUNCTION__)); | |||||
2331 | return V; | |||||
2332 | } | |||||
2333 | unsigned EndIndex = BeginIndex + Ty->getNumElements(); | |||||
2334 | ||||||
2335 | // When inserting a smaller vector into the larger to store, we first | |||||
2336 | // use a shuffle vector to widen it with undef elements, and then | |||||
2337 | // a second shuffle vector to select between the loaded vector and the | |||||
2338 | // incoming vector. | |||||
2339 | SmallVector<Constant *, 8> Mask; | |||||
2340 | Mask.reserve(VecTy->getNumElements()); | |||||
2341 | for (unsigned i = 0; i != VecTy->getNumElements(); ++i) | |||||
2342 | if (i >= BeginIndex && i < EndIndex) | |||||
2343 | Mask.push_back(IRB.getInt32(i - BeginIndex)); | |||||
2344 | else | |||||
2345 | Mask.push_back(UndefValue::get(IRB.getInt32Ty())); | |||||
2346 | V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), | |||||
2347 | ConstantVector::get(Mask), Name + ".expand"); | |||||
2348 | DEBUG(dbgs() << " shuffle: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " shuffle: " << *V << "\n"; } } while (0); | |||||
2349 | ||||||
2350 | Mask.clear(); | |||||
2351 | for (unsigned i = 0; i != VecTy->getNumElements(); ++i) | |||||
2352 | Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); | |||||
2353 | ||||||
2354 | V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend"); | |||||
2355 | ||||||
2356 | DEBUG(dbgs() << " blend: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " blend: " << *V << "\n"; } } while (0); | |||||
2357 | return V; | |||||
2358 | } | |||||
2359 | ||||||
2360 | namespace { | |||||
2361 | /// \brief Visitor to rewrite instructions using p particular slice of an alloca | |||||
2362 | /// to use a new alloca. | |||||
2363 | /// | |||||
2364 | /// Also implements the rewriting to vector-based accesses when the partition | |||||
2365 | /// passes the isVectorPromotionViable predicate. Most of the rewriting logic | |||||
2366 | /// lives here. | |||||
2367 | class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { | |||||
2368 | // Befriend the base class so it can delegate to private visit methods. | |||||
2369 | friend class llvm::InstVisitor<AllocaSliceRewriter, bool>; | |||||
2370 | typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base; | |||||
2371 | ||||||
2372 | const DataLayout &DL; | |||||
2373 | AllocaSlices &AS; | |||||
2374 | SROA &Pass; | |||||
2375 | AllocaInst &OldAI, &NewAI; | |||||
2376 | const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; | |||||
2377 | Type *NewAllocaTy; | |||||
2378 | ||||||
2379 | // This is a convenience and flag variable that will be null unless the new | |||||
2380 | // alloca's integer operations should be widened to this integer type due to | |||||
2381 | // passing isIntegerWideningViable above. If it is non-null, the desired | |||||
2382 | // integer type will be stored here for easy access during rewriting. | |||||
2383 | IntegerType *IntTy; | |||||
2384 | ||||||
2385 | // If we are rewriting an alloca partition which can be written as pure | |||||
2386 | // vector operations, we stash extra information here. When VecTy is | |||||
2387 | // non-null, we have some strict guarantees about the rewritten alloca: | |||||
2388 | // - The new alloca is exactly the size of the vector type here. | |||||
2389 | // - The accesses all either map to the entire vector or to a single | |||||
2390 | // element. | |||||
2391 | // - The set of accessing instructions is only one of those handled above | |||||
2392 | // in isVectorPromotionViable. Generally these are the same access kinds | |||||
2393 | // which are promotable via mem2reg. | |||||
2394 | VectorType *VecTy; | |||||
2395 | Type *ElementTy; | |||||
2396 | uint64_t ElementSize; | |||||
2397 | ||||||
2398 | // The original offset of the slice currently being rewritten relative to | |||||
2399 | // the original alloca. | |||||
2400 | uint64_t BeginOffset, EndOffset; | |||||
2401 | // The new offsets of the slice currently being rewritten relative to the | |||||
2402 | // original alloca. | |||||
2403 | uint64_t NewBeginOffset, NewEndOffset; | |||||
2404 | ||||||
2405 | uint64_t SliceSize; | |||||
2406 | bool IsSplittable; | |||||
2407 | bool IsSplit; | |||||
2408 | Use *OldUse; | |||||
2409 | Instruction *OldPtr; | |||||
2410 | ||||||
2411 | // Track post-rewrite users which are PHI nodes and Selects. | |||||
2412 | SmallPtrSetImpl<PHINode *> &PHIUsers; | |||||
2413 | SmallPtrSetImpl<SelectInst *> &SelectUsers; | |||||
2414 | ||||||
2415 | // Utility IR builder, whose name prefix is setup for each visited use, and | |||||
2416 | // the insertion point is set to point to the user. | |||||
2417 | IRBuilderTy IRB; | |||||
2418 | ||||||
2419 | public: | |||||
2420 | AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass, | |||||
2421 | AllocaInst &OldAI, AllocaInst &NewAI, | |||||
2422 | uint64_t NewAllocaBeginOffset, | |||||
2423 | uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, | |||||
2424 | VectorType *PromotableVecTy, | |||||
2425 | SmallPtrSetImpl<PHINode *> &PHIUsers, | |||||
2426 | SmallPtrSetImpl<SelectInst *> &SelectUsers) | |||||
2427 | : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), | |||||
2428 | NewAllocaBeginOffset(NewAllocaBeginOffset), | |||||
2429 | NewAllocaEndOffset(NewAllocaEndOffset), | |||||
2430 | NewAllocaTy(NewAI.getAllocatedType()), | |||||
2431 | IntTy(IsIntegerPromotable | |||||
2432 | ? Type::getIntNTy( | |||||
2433 | NewAI.getContext(), | |||||
2434 | DL.getTypeSizeInBits(NewAI.getAllocatedType())) | |||||
2435 | : nullptr), | |||||
2436 | VecTy(PromotableVecTy), | |||||
2437 | ElementTy(VecTy ? VecTy->getElementType() : nullptr), | |||||
2438 | ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0), | |||||
2439 | BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(), | |||||
2440 | OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers), | |||||
2441 | IRB(NewAI.getContext(), ConstantFolder()) { | |||||
2442 | if (VecTy) { | |||||
2443 | assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&(((DL.getTypeSizeInBits(ElementTy) % 8) == 0 && "Only multiple-of-8 sized vector elements are viable" ) ? static_cast<void> (0) : __assert_fail ("(DL.getTypeSizeInBits(ElementTy) % 8) == 0 && \"Only multiple-of-8 sized vector elements are viable\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2444, __PRETTY_FUNCTION__)) | |||||
2444 | "Only multiple-of-8 sized vector elements are viable")(((DL.getTypeSizeInBits(ElementTy) % 8) == 0 && "Only multiple-of-8 sized vector elements are viable" ) ? static_cast<void> (0) : __assert_fail ("(DL.getTypeSizeInBits(ElementTy) % 8) == 0 && \"Only multiple-of-8 sized vector elements are viable\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2444, __PRETTY_FUNCTION__)); | |||||
2445 | ++NumVectorized; | |||||
2446 | } | |||||
2447 | assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy))(((!IntTy && !VecTy) || (IntTy && !VecTy) || ( !IntTy && VecTy)) ? static_cast<void> (0) : __assert_fail ("(!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy)" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2447, __PRETTY_FUNCTION__)); | |||||
2448 | } | |||||
2449 | ||||||
2450 | bool visit(AllocaSlices::const_iterator I) { | |||||
2451 | bool CanSROA = true; | |||||
2452 | BeginOffset = I->beginOffset(); | |||||
2453 | EndOffset = I->endOffset(); | |||||
2454 | IsSplittable = I->isSplittable(); | |||||
2455 | IsSplit = | |||||
2456 | BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset; | |||||
2457 | DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " rewriting " << (IsSplit ? "split " : ""); } } while (0); | |||||
2458 | DEBUG(AS.printSlice(dbgs(), I, ""))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { AS.printSlice(dbgs(), I, ""); } } while (0); | |||||
2459 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "\n"; } } while (0); | |||||
2460 | ||||||
2461 | // Compute the intersecting offset range. | |||||
2462 | assert(BeginOffset < NewAllocaEndOffset)((BeginOffset < NewAllocaEndOffset) ? static_cast<void> (0) : __assert_fail ("BeginOffset < NewAllocaEndOffset", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2462, __PRETTY_FUNCTION__)); | |||||
2463 | assert(EndOffset > NewAllocaBeginOffset)((EndOffset > NewAllocaBeginOffset) ? static_cast<void> (0) : __assert_fail ("EndOffset > NewAllocaBeginOffset", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2463, __PRETTY_FUNCTION__)); | |||||
2464 | NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset); | |||||
2465 | NewEndOffset = std::min(EndOffset, NewAllocaEndOffset); | |||||
2466 | ||||||
2467 | SliceSize = NewEndOffset - NewBeginOffset; | |||||
2468 | ||||||
2469 | OldUse = I->getUse(); | |||||
2470 | OldPtr = cast<Instruction>(OldUse->get()); | |||||
2471 | ||||||
2472 | Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); | |||||
2473 | IRB.SetInsertPoint(OldUserI); | |||||
2474 | IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); | |||||
2475 | IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + "."); | |||||
2476 | ||||||
2477 | CanSROA &= visit(cast<Instruction>(OldUse->getUser())); | |||||
2478 | if (VecTy || IntTy) | |||||
2479 | assert(CanSROA)((CanSROA) ? static_cast<void> (0) : __assert_fail ("CanSROA" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2479, __PRETTY_FUNCTION__)); | |||||
2480 | return CanSROA; | |||||
2481 | } | |||||
2482 | ||||||
2483 | private: | |||||
2484 | // Make sure the other visit overloads are visible. | |||||
2485 | using Base::visit; | |||||
2486 | ||||||
2487 | // Every instruction which can end up as a user must have a rewrite rule. | |||||
2488 | bool visitInstruction(Instruction &I) { | |||||
2489 | DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " !!!! Cannot rewrite: " << I << "\n"; } } while (0); | |||||
2490 | llvm_unreachable("No rewrite rule for this instruction!")::llvm::llvm_unreachable_internal("No rewrite rule for this instruction!" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2490); | |||||
2491 | } | |||||
2492 | ||||||
2493 | Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) { | |||||
2494 | // Note that the offset computation can use BeginOffset or NewBeginOffset | |||||
2495 | // interchangeably for unsplit slices. | |||||
2496 | assert(IsSplit || BeginOffset == NewBeginOffset)((IsSplit || BeginOffset == NewBeginOffset) ? static_cast< void> (0) : __assert_fail ("IsSplit || BeginOffset == NewBeginOffset" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2496, __PRETTY_FUNCTION__)); | |||||
2497 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||||
2498 | ||||||
2499 | #ifndef NDEBUG | |||||
2500 | StringRef OldName = OldPtr->getName(); | |||||
2501 | // Skip through the last '.sroa.' component of the name. | |||||
2502 | size_t LastSROAPrefix = OldName.rfind(".sroa."); | |||||
2503 | if (LastSROAPrefix != StringRef::npos) { | |||||
2504 | OldName = OldName.substr(LastSROAPrefix + strlen(".sroa.")); | |||||
2505 | // Look for an SROA slice index. | |||||
2506 | size_t IndexEnd = OldName.find_first_not_of("0123456789"); | |||||
2507 | if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') { | |||||
2508 | // Strip the index and look for the offset. | |||||
2509 | OldName = OldName.substr(IndexEnd + 1); | |||||
2510 | size_t OffsetEnd = OldName.find_first_not_of("0123456789"); | |||||
2511 | if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.') | |||||
2512 | // Strip the offset. | |||||
2513 | OldName = OldName.substr(OffsetEnd + 1); | |||||
2514 | } | |||||
2515 | } | |||||
2516 | // Strip any SROA suffixes as well. | |||||
2517 | OldName = OldName.substr(0, OldName.find(".sroa_")); | |||||
2518 | #endif | |||||
2519 | ||||||
2520 | return getAdjustedPtr(IRB, DL, &NewAI, | |||||
2521 | APInt(DL.getPointerSizeInBits(), Offset), PointerTy, | |||||
2522 | #ifndef NDEBUG | |||||
2523 | Twine(OldName) + "." | |||||
2524 | #else | |||||
2525 | Twine() | |||||
2526 | #endif | |||||
2527 | ); | |||||
2528 | } | |||||
2529 | ||||||
2530 | /// \brief Compute suitable alignment to access this slice of the *new* | |||||
2531 | /// alloca. | |||||
2532 | /// | |||||
2533 | /// You can optionally pass a type to this routine and if that type's ABI | |||||
2534 | /// alignment is itself suitable, this will return zero. | |||||
2535 | unsigned getSliceAlign(Type *Ty = nullptr) { | |||||
2536 | unsigned NewAIAlign = NewAI.getAlignment(); | |||||
2537 | if (!NewAIAlign) | |||||
2538 | NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType()); | |||||
2539 | unsigned Align = | |||||
2540 | MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset); | |||||
2541 | return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align; | |||||
2542 | } | |||||
2543 | ||||||
2544 | unsigned getIndex(uint64_t Offset) { | |||||
2545 | assert(VecTy && "Can only call getIndex when rewriting a vector")((VecTy && "Can only call getIndex when rewriting a vector" ) ? static_cast<void> (0) : __assert_fail ("VecTy && \"Can only call getIndex when rewriting a vector\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2545, __PRETTY_FUNCTION__)); | |||||
2546 | uint64_t RelOffset = Offset - NewAllocaBeginOffset; | |||||
2547 | assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds")((RelOffset / ElementSize < (4294967295U) && "Index out of bounds" ) ? static_cast<void> (0) : __assert_fail ("RelOffset / ElementSize < (4294967295U) && \"Index out of bounds\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2547, __PRETTY_FUNCTION__)); | |||||
2548 | uint32_t Index = RelOffset / ElementSize; | |||||
2549 | assert(Index * ElementSize == RelOffset)((Index * ElementSize == RelOffset) ? static_cast<void> (0) : __assert_fail ("Index * ElementSize == RelOffset", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2549, __PRETTY_FUNCTION__)); | |||||
2550 | return Index; | |||||
2551 | } | |||||
2552 | ||||||
2553 | void deleteIfTriviallyDead(Value *V) { | |||||
2554 | Instruction *I = cast<Instruction>(V); | |||||
2555 | if (isInstructionTriviallyDead(I)) | |||||
2556 | Pass.DeadInsts.insert(I); | |||||
2557 | } | |||||
2558 | ||||||
2559 | Value *rewriteVectorizedLoadInst() { | |||||
2560 | unsigned BeginIndex = getIndex(NewBeginOffset); | |||||
2561 | unsigned EndIndex = getIndex(NewEndOffset); | |||||
2562 | assert(EndIndex > BeginIndex && "Empty vector!")((EndIndex > BeginIndex && "Empty vector!") ? static_cast <void> (0) : __assert_fail ("EndIndex > BeginIndex && \"Empty vector!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2562, __PRETTY_FUNCTION__)); | |||||
2563 | ||||||
2564 | Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load"); | |||||
2565 | return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); | |||||
2566 | } | |||||
2567 | ||||||
2568 | Value *rewriteIntegerLoad(LoadInst &LI) { | |||||
2569 | assert(IntTy && "We cannot insert an integer to the alloca")((IntTy && "We cannot insert an integer to the alloca" ) ? static_cast<void> (0) : __assert_fail ("IntTy && \"We cannot insert an integer to the alloca\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2569, __PRETTY_FUNCTION__)); | |||||
2570 | assert(!LI.isVolatile())((!LI.isVolatile()) ? static_cast<void> (0) : __assert_fail ("!LI.isVolatile()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2570, __PRETTY_FUNCTION__)); | |||||
2571 | Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load"); | |||||
2572 | V = convertValue(DL, IRB, V, IntTy); | |||||
2573 | assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset")((NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset" ) ? static_cast<void> (0) : __assert_fail ("NewBeginOffset >= NewAllocaBeginOffset && \"Out of bounds offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2573, __PRETTY_FUNCTION__)); | |||||
2574 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||||
2575 | if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) | |||||
2576 | V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset, | |||||
2577 | "extract"); | |||||
2578 | return V; | |||||
2579 | } | |||||
2580 | ||||||
2581 | bool visitLoadInst(LoadInst &LI) { | |||||
2582 | DEBUG(dbgs() << " original: " << LI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << LI << "\n"; } } while (0); | |||||
2583 | Value *OldOp = LI.getOperand(0); | |||||
2584 | assert(OldOp == OldPtr)((OldOp == OldPtr) ? static_cast<void> (0) : __assert_fail ("OldOp == OldPtr", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2584, __PRETTY_FUNCTION__)); | |||||
2585 | ||||||
2586 | Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) | |||||
| ||||||
2587 | : LI.getType(); | |||||
2588 | bool IsPtrAdjusted = false; | |||||
2589 | Value *V; | |||||
2590 | if (VecTy) { | |||||
2591 | V = rewriteVectorizedLoadInst(); | |||||
2592 | } else if (IntTy && LI.getType()->isIntegerTy()) { | |||||
2593 | V = rewriteIntegerLoad(LI); | |||||
2594 | } else if (NewBeginOffset == NewAllocaBeginOffset && | |||||
2595 | canConvertValue(DL, NewAllocaTy, LI.getType())) { | |||||
2596 | V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), | |||||
2597 | LI.getName()); | |||||
2598 | } else { | |||||
2599 | Type *LTy = TargetTy->getPointerTo(); | |||||
2600 | V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy), | |||||
2601 | getSliceAlign(TargetTy), LI.isVolatile(), | |||||
2602 | LI.getName()); | |||||
2603 | IsPtrAdjusted = true; | |||||
2604 | } | |||||
2605 | V = convertValue(DL, IRB, V, TargetTy); | |||||
2606 | ||||||
2607 | if (IsSplit) { | |||||
2608 | assert(!LI.isVolatile())((!LI.isVolatile()) ? static_cast<void> (0) : __assert_fail ("!LI.isVolatile()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2608, __PRETTY_FUNCTION__)); | |||||
2609 | assert(LI.getType()->isIntegerTy() &&((LI.getType()->isIntegerTy() && "Only integer type loads and stores are split" ) ? static_cast<void> (0) : __assert_fail ("LI.getType()->isIntegerTy() && \"Only integer type loads and stores are split\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2610, __PRETTY_FUNCTION__)) | |||||
2610 | "Only integer type loads and stores are split")((LI.getType()->isIntegerTy() && "Only integer type loads and stores are split" ) ? static_cast<void> (0) : __assert_fail ("LI.getType()->isIntegerTy() && \"Only integer type loads and stores are split\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2610, __PRETTY_FUNCTION__)); | |||||
2611 | assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&((SliceSize < DL.getTypeStoreSize(LI.getType()) && "Split load isn't smaller than original load") ? static_cast <void> (0) : __assert_fail ("SliceSize < DL.getTypeStoreSize(LI.getType()) && \"Split load isn't smaller than original load\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2612, __PRETTY_FUNCTION__)) | |||||
2612 | "Split load isn't smaller than original load")((SliceSize < DL.getTypeStoreSize(LI.getType()) && "Split load isn't smaller than original load") ? static_cast <void> (0) : __assert_fail ("SliceSize < DL.getTypeStoreSize(LI.getType()) && \"Split load isn't smaller than original load\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2612, __PRETTY_FUNCTION__)); | |||||
2613 | assert(LI.getType()->getIntegerBitWidth() ==((LI.getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits (LI.getType()) && "Non-byte-multiple bit width") ? static_cast <void> (0) : __assert_fail ("LI.getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits(LI.getType()) && \"Non-byte-multiple bit width\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2615, __PRETTY_FUNCTION__)) | |||||
2614 | DL.getTypeStoreSizeInBits(LI.getType()) &&((LI.getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits (LI.getType()) && "Non-byte-multiple bit width") ? static_cast <void> (0) : __assert_fail ("LI.getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits(LI.getType()) && \"Non-byte-multiple bit width\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2615, __PRETTY_FUNCTION__)) | |||||
2615 | "Non-byte-multiple bit width")((LI.getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits (LI.getType()) && "Non-byte-multiple bit width") ? static_cast <void> (0) : __assert_fail ("LI.getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits(LI.getType()) && \"Non-byte-multiple bit width\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2615, __PRETTY_FUNCTION__)); | |||||
2616 | // Move the insertion point just past the load so that we can refer to it. | |||||
2617 | IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI))); | |||||
2618 | // Create a placeholder value with the same type as LI to use as the | |||||
2619 | // basis for the new value. This allows us to replace the uses of LI with | |||||
2620 | // the computed value, and then replace the placeholder with LI, leaving | |||||
2621 | // LI only used for this computation. | |||||
2622 | Value *Placeholder = | |||||
2623 | new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); | |||||
2624 | V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset, | |||||
2625 | "insert"); | |||||
2626 | LI.replaceAllUsesWith(V); | |||||
2627 | Placeholder->replaceAllUsesWith(&LI); | |||||
2628 | delete Placeholder; | |||||
2629 | } else { | |||||
2630 | LI.replaceAllUsesWith(V); | |||||
2631 | } | |||||
2632 | ||||||
2633 | Pass.DeadInsts.insert(&LI); | |||||
2634 | deleteIfTriviallyDead(OldOp); | |||||
2635 | DEBUG(dbgs() << " to: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *V << "\n"; } } while (0); | |||||
| ||||||
2636 | return !LI.isVolatile() && !IsPtrAdjusted; | |||||
2637 | } | |||||
2638 | ||||||
2639 | bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) { | |||||
2640 | if (V->getType() != VecTy) { | |||||
2641 | unsigned BeginIndex = getIndex(NewBeginOffset); | |||||
2642 | unsigned EndIndex = getIndex(NewEndOffset); | |||||
2643 | assert(EndIndex > BeginIndex && "Empty vector!")((EndIndex > BeginIndex && "Empty vector!") ? static_cast <void> (0) : __assert_fail ("EndIndex > BeginIndex && \"Empty vector!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2643, __PRETTY_FUNCTION__)); | |||||
2644 | unsigned NumElements = EndIndex - BeginIndex; | |||||
2645 | assert(NumElements <= VecTy->getNumElements() && "Too many elements!")((NumElements <= VecTy->getNumElements() && "Too many elements!" ) ? static_cast<void> (0) : __assert_fail ("NumElements <= VecTy->getNumElements() && \"Too many elements!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2645, __PRETTY_FUNCTION__)); | |||||
2646 | Type *SliceTy = (NumElements == 1) | |||||
2647 | ? ElementTy | |||||
2648 | : VectorType::get(ElementTy, NumElements); | |||||
2649 | if (V->getType() != SliceTy) | |||||
2650 | V = convertValue(DL, IRB, V, SliceTy); | |||||
2651 | ||||||
2652 | // Mix in the existing elements. | |||||
2653 | Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load"); | |||||
2654 | V = insertVector(IRB, Old, V, BeginIndex, "vec"); | |||||
2655 | } | |||||
2656 | StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); | |||||
2657 | Pass.DeadInsts.insert(&SI); | |||||
2658 | ||||||
2659 | (void)Store; | |||||
2660 | DEBUG(dbgs() << " to: " << *Store << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *Store << "\n"; } } while (0); | |||||
2661 | return true; | |||||
2662 | } | |||||
2663 | ||||||
2664 | bool rewriteIntegerStore(Value *V, StoreInst &SI) { | |||||
2665 | assert(IntTy && "We cannot extract an integer from the alloca")((IntTy && "We cannot extract an integer from the alloca" ) ? static_cast<void> (0) : __assert_fail ("IntTy && \"We cannot extract an integer from the alloca\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2665, __PRETTY_FUNCTION__)); | |||||
2666 | assert(!SI.isVolatile())((!SI.isVolatile()) ? static_cast<void> (0) : __assert_fail ("!SI.isVolatile()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2666, __PRETTY_FUNCTION__)); | |||||
2667 | if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { | |||||
2668 | Value *Old = | |||||
2669 | IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload"); | |||||
2670 | Old = convertValue(DL, IRB, Old, IntTy); | |||||
2671 | assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset")((BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset" ) ? static_cast<void> (0) : __assert_fail ("BeginOffset >= NewAllocaBeginOffset && \"Out of bounds offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2671, __PRETTY_FUNCTION__)); | |||||
2672 | uint64_t Offset = BeginOffset - NewAllocaBeginOffset; | |||||
2673 | V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert"); | |||||
2674 | } | |||||
2675 | V = convertValue(DL, IRB, V, NewAllocaTy); | |||||
2676 | StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); | |||||
2677 | Pass.DeadInsts.insert(&SI); | |||||
2678 | (void)Store; | |||||
2679 | DEBUG(dbgs() << " to: " << *Store << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *Store << "\n"; } } while (0); | |||||
2680 | return true; | |||||
2681 | } | |||||
2682 | ||||||
2683 | bool visitStoreInst(StoreInst &SI) { | |||||
2684 | DEBUG(dbgs() << " original: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << SI << "\n"; } } while (0); | |||||
2685 | Value *OldOp = SI.getOperand(1); | |||||
2686 | assert(OldOp == OldPtr)((OldOp == OldPtr) ? static_cast<void> (0) : __assert_fail ("OldOp == OldPtr", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2686, __PRETTY_FUNCTION__)); | |||||
2687 | ||||||
2688 | Value *V = SI.getValueOperand(); | |||||
2689 | ||||||
2690 | // Strip all inbounds GEPs and pointer casts to try to dig out any root | |||||
2691 | // alloca that should be re-examined after promoting this alloca. | |||||
2692 | if (V->getType()->isPointerTy()) | |||||
2693 | if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) | |||||
2694 | Pass.PostPromotionWorklist.insert(AI); | |||||
2695 | ||||||
2696 | if (SliceSize < DL.getTypeStoreSize(V->getType())) { | |||||
2697 | assert(!SI.isVolatile())((!SI.isVolatile()) ? static_cast<void> (0) : __assert_fail ("!SI.isVolatile()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2697, __PRETTY_FUNCTION__)); | |||||
2698 | assert(V->getType()->isIntegerTy() &&((V->getType()->isIntegerTy() && "Only integer type loads and stores are split" ) ? static_cast<void> (0) : __assert_fail ("V->getType()->isIntegerTy() && \"Only integer type loads and stores are split\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2699, __PRETTY_FUNCTION__)) | |||||
2699 | "Only integer type loads and stores are split")((V->getType()->isIntegerTy() && "Only integer type loads and stores are split" ) ? static_cast<void> (0) : __assert_fail ("V->getType()->isIntegerTy() && \"Only integer type loads and stores are split\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2699, __PRETTY_FUNCTION__)); | |||||
2700 | assert(V->getType()->getIntegerBitWidth() ==((V->getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits (V->getType()) && "Non-byte-multiple bit width") ? static_cast<void> (0) : __assert_fail ("V->getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits(V->getType()) && \"Non-byte-multiple bit width\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2702, __PRETTY_FUNCTION__)) | |||||
2701 | DL.getTypeStoreSizeInBits(V->getType()) &&((V->getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits (V->getType()) && "Non-byte-multiple bit width") ? static_cast<void> (0) : __assert_fail ("V->getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits(V->getType()) && \"Non-byte-multiple bit width\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2702, __PRETTY_FUNCTION__)) | |||||
2702 | "Non-byte-multiple bit width")((V->getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits (V->getType()) && "Non-byte-multiple bit width") ? static_cast<void> (0) : __assert_fail ("V->getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits(V->getType()) && \"Non-byte-multiple bit width\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2702, __PRETTY_FUNCTION__)); | |||||
2703 | IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8); | |||||
2704 | V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset, | |||||
2705 | "extract"); | |||||
2706 | } | |||||
2707 | ||||||
2708 | if (VecTy) | |||||
2709 | return rewriteVectorizedStoreInst(V, SI, OldOp); | |||||
2710 | if (IntTy && V->getType()->isIntegerTy()) | |||||
2711 | return rewriteIntegerStore(V, SI); | |||||
2712 | ||||||
2713 | StoreInst *NewSI; | |||||
2714 | if (NewBeginOffset == NewAllocaBeginOffset && | |||||
2715 | NewEndOffset == NewAllocaEndOffset && | |||||
2716 | canConvertValue(DL, V->getType(), NewAllocaTy)) { | |||||
2717 | V = convertValue(DL, IRB, V, NewAllocaTy); | |||||
2718 | NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), | |||||
2719 | SI.isVolatile()); | |||||
2720 | } else { | |||||
2721 | Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo()); | |||||
2722 | NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()), | |||||
2723 | SI.isVolatile()); | |||||
2724 | } | |||||
2725 | (void)NewSI; | |||||
2726 | Pass.DeadInsts.insert(&SI); | |||||
2727 | deleteIfTriviallyDead(OldOp); | |||||
2728 | ||||||
2729 | DEBUG(dbgs() << " to: " << *NewSI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *NewSI << "\n"; } } while (0); | |||||
2730 | return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); | |||||
2731 | } | |||||
2732 | ||||||
2733 | /// \brief Compute an integer value from splatting an i8 across the given | |||||
2734 | /// number of bytes. | |||||
2735 | /// | |||||
2736 | /// Note that this routine assumes an i8 is a byte. If that isn't true, don't | |||||
2737 | /// call this routine. | |||||
2738 | /// FIXME: Heed the advice above. | |||||
2739 | /// | |||||
2740 | /// \param V The i8 value to splat. | |||||
2741 | /// \param Size The number of bytes in the output (assuming i8 is one byte) | |||||
2742 | Value *getIntegerSplat(Value *V, unsigned Size) { | |||||
2743 | assert(Size > 0 && "Expected a positive number of bytes.")((Size > 0 && "Expected a positive number of bytes." ) ? static_cast<void> (0) : __assert_fail ("Size > 0 && \"Expected a positive number of bytes.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2743, __PRETTY_FUNCTION__)); | |||||
2744 | IntegerType *VTy = cast<IntegerType>(V->getType()); | |||||
2745 | assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte")((VTy->getBitWidth() == 8 && "Expected an i8 value for the byte" ) ? static_cast<void> (0) : __assert_fail ("VTy->getBitWidth() == 8 && \"Expected an i8 value for the byte\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2745, __PRETTY_FUNCTION__)); | |||||
2746 | if (Size == 1) | |||||
2747 | return V; | |||||
2748 | ||||||
2749 | Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8); | |||||
2750 | V = IRB.CreateMul( | |||||
2751 | IRB.CreateZExt(V, SplatIntTy, "zext"), | |||||
2752 | ConstantExpr::getUDiv( | |||||
2753 | Constant::getAllOnesValue(SplatIntTy), | |||||
2754 | ConstantExpr::getZExt(Constant::getAllOnesValue(V->getType()), | |||||
2755 | SplatIntTy)), | |||||
2756 | "isplat"); | |||||
2757 | return V; | |||||
2758 | } | |||||
2759 | ||||||
2760 | /// \brief Compute a vector splat for a given element value. | |||||
2761 | Value *getVectorSplat(Value *V, unsigned NumElements) { | |||||
2762 | V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); | |||||
2763 | DEBUG(dbgs() << " splat: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " splat: " << *V << "\n"; } } while (0); | |||||
2764 | return V; | |||||
2765 | } | |||||
2766 | ||||||
2767 | bool visitMemSetInst(MemSetInst &II) { | |||||
2768 | DEBUG(dbgs() << " original: " << II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << II << "\n"; } } while (0); | |||||
2769 | assert(II.getRawDest() == OldPtr)((II.getRawDest() == OldPtr) ? static_cast<void> (0) : __assert_fail ("II.getRawDest() == OldPtr", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2769, __PRETTY_FUNCTION__)); | |||||
2770 | ||||||
2771 | // If the memset has a variable size, it cannot be split, just adjust the | |||||
2772 | // pointer to the new alloca. | |||||
2773 | if (!isa<Constant>(II.getLength())) { | |||||
2774 | assert(!IsSplit)((!IsSplit) ? static_cast<void> (0) : __assert_fail ("!IsSplit" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2774, __PRETTY_FUNCTION__)); | |||||
2775 | assert(NewBeginOffset == BeginOffset)((NewBeginOffset == BeginOffset) ? static_cast<void> (0 ) : __assert_fail ("NewBeginOffset == BeginOffset", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2775, __PRETTY_FUNCTION__)); | |||||
2776 | II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); | |||||
2777 | Type *CstTy = II.getAlignmentCst()->getType(); | |||||
2778 | II.setAlignment(ConstantInt::get(CstTy, getSliceAlign())); | |||||
2779 | ||||||
2780 | deleteIfTriviallyDead(OldPtr); | |||||
2781 | return false; | |||||
2782 | } | |||||
2783 | ||||||
2784 | // Record this instruction for deletion. | |||||
2785 | Pass.DeadInsts.insert(&II); | |||||
2786 | ||||||
2787 | Type *AllocaTy = NewAI.getAllocatedType(); | |||||
2788 | Type *ScalarTy = AllocaTy->getScalarType(); | |||||
2789 | ||||||
2790 | // If this doesn't map cleanly onto the alloca type, and that type isn't | |||||
2791 | // a single value type, just emit a memset. | |||||
2792 | if (!VecTy && !IntTy && | |||||
2793 | (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset || | |||||
2794 | SliceSize != DL.getTypeStoreSize(AllocaTy) || | |||||
2795 | !AllocaTy->isSingleValueType() || | |||||
2796 | !DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) || | |||||
2797 | DL.getTypeSizeInBits(ScalarTy) % 8 != 0)) { | |||||
2798 | Type *SizeTy = II.getLength()->getType(); | |||||
2799 | Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); | |||||
2800 | CallInst *New = IRB.CreateMemSet( | |||||
2801 | getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, | |||||
2802 | getSliceAlign(), II.isVolatile()); | |||||
2803 | (void)New; | |||||
2804 | DEBUG(dbgs() << " to: " << *New << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *New << "\n"; } } while (0); | |||||
2805 | return false; | |||||
2806 | } | |||||
2807 | ||||||
2808 | // If we can represent this as a simple value, we have to build the actual | |||||
2809 | // value to store, which requires expanding the byte present in memset to | |||||
2810 | // a sensible representation for the alloca type. This is essentially | |||||
2811 | // splatting the byte to a sufficiently wide integer, splatting it across | |||||
2812 | // any desired vector width, and bitcasting to the final type. | |||||
2813 | Value *V; | |||||
2814 | ||||||
2815 | if (VecTy) { | |||||
2816 | // If this is a memset of a vectorized alloca, insert it. | |||||
2817 | assert(ElementTy == ScalarTy)((ElementTy == ScalarTy) ? static_cast<void> (0) : __assert_fail ("ElementTy == ScalarTy", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2817, __PRETTY_FUNCTION__)); | |||||
2818 | ||||||
2819 | unsigned BeginIndex = getIndex(NewBeginOffset); | |||||
2820 | unsigned EndIndex = getIndex(NewEndOffset); | |||||
2821 | assert(EndIndex > BeginIndex && "Empty vector!")((EndIndex > BeginIndex && "Empty vector!") ? static_cast <void> (0) : __assert_fail ("EndIndex > BeginIndex && \"Empty vector!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2821, __PRETTY_FUNCTION__)); | |||||
2822 | unsigned NumElements = EndIndex - BeginIndex; | |||||
2823 | assert(NumElements <= VecTy->getNumElements() && "Too many elements!")((NumElements <= VecTy->getNumElements() && "Too many elements!" ) ? static_cast<void> (0) : __assert_fail ("NumElements <= VecTy->getNumElements() && \"Too many elements!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2823, __PRETTY_FUNCTION__)); | |||||
2824 | ||||||
2825 | Value *Splat = | |||||
2826 | getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ElementTy) / 8); | |||||
2827 | Splat = convertValue(DL, IRB, Splat, ElementTy); | |||||
2828 | if (NumElements > 1) | |||||
2829 | Splat = getVectorSplat(Splat, NumElements); | |||||
2830 | ||||||
2831 | Value *Old = | |||||
2832 | IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload"); | |||||
2833 | V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); | |||||
2834 | } else if (IntTy) { | |||||
2835 | // If this is a memset on an alloca where we can widen stores, insert the | |||||
2836 | // set integer. | |||||
2837 | assert(!II.isVolatile())((!II.isVolatile()) ? static_cast<void> (0) : __assert_fail ("!II.isVolatile()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2837, __PRETTY_FUNCTION__)); | |||||
2838 | ||||||
2839 | uint64_t Size = NewEndOffset - NewBeginOffset; | |||||
2840 | V = getIntegerSplat(II.getValue(), Size); | |||||
2841 | ||||||
2842 | if (IntTy && (BeginOffset != NewAllocaBeginOffset || | |||||
2843 | EndOffset != NewAllocaBeginOffset)) { | |||||
2844 | Value *Old = | |||||
2845 | IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload"); | |||||
2846 | Old = convertValue(DL, IRB, Old, IntTy); | |||||
2847 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||||
2848 | V = insertInteger(DL, IRB, Old, V, Offset, "insert"); | |||||
2849 | } else { | |||||
2850 | assert(V->getType() == IntTy &&((V->getType() == IntTy && "Wrong type for an alloca wide integer!" ) ? static_cast<void> (0) : __assert_fail ("V->getType() == IntTy && \"Wrong type for an alloca wide integer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2851, __PRETTY_FUNCTION__)) | |||||
2851 | "Wrong type for an alloca wide integer!")((V->getType() == IntTy && "Wrong type for an alloca wide integer!" ) ? static_cast<void> (0) : __assert_fail ("V->getType() == IntTy && \"Wrong type for an alloca wide integer!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2851, __PRETTY_FUNCTION__)); | |||||
2852 | } | |||||
2853 | V = convertValue(DL, IRB, V, AllocaTy); | |||||
2854 | } else { | |||||
2855 | // Established these invariants above. | |||||
2856 | assert(NewBeginOffset == NewAllocaBeginOffset)((NewBeginOffset == NewAllocaBeginOffset) ? static_cast<void > (0) : __assert_fail ("NewBeginOffset == NewAllocaBeginOffset" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2856, __PRETTY_FUNCTION__)); | |||||
2857 | assert(NewEndOffset == NewAllocaEndOffset)((NewEndOffset == NewAllocaEndOffset) ? static_cast<void> (0) : __assert_fail ("NewEndOffset == NewAllocaEndOffset", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2857, __PRETTY_FUNCTION__)); | |||||
2858 | ||||||
2859 | V = getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ScalarTy) / 8); | |||||
2860 | if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) | |||||
2861 | V = getVectorSplat(V, AllocaVecTy->getNumElements()); | |||||
2862 | ||||||
2863 | V = convertValue(DL, IRB, V, AllocaTy); | |||||
2864 | } | |||||
2865 | ||||||
2866 | Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), | |||||
2867 | II.isVolatile()); | |||||
2868 | (void)New; | |||||
2869 | DEBUG(dbgs() << " to: " << *New << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *New << "\n"; } } while (0); | |||||
2870 | return !II.isVolatile(); | |||||
2871 | } | |||||
2872 | ||||||
2873 | bool visitMemTransferInst(MemTransferInst &II) { | |||||
2874 | // Rewriting of memory transfer instructions can be a bit tricky. We break | |||||
2875 | // them into two categories: split intrinsics and unsplit intrinsics. | |||||
2876 | ||||||
2877 | DEBUG(dbgs() << " original: " << II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << II << "\n"; } } while (0); | |||||
2878 | ||||||
2879 | bool IsDest = &II.getRawDestUse() == OldUse; | |||||
2880 | assert((IsDest && II.getRawDest() == OldPtr) ||(((IsDest && II.getRawDest() == OldPtr) || (!IsDest && II.getRawSource() == OldPtr)) ? static_cast<void> (0) : __assert_fail ("(IsDest && II.getRawDest() == OldPtr) || (!IsDest && II.getRawSource() == OldPtr)" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2881, __PRETTY_FUNCTION__)) | |||||
2881 | (!IsDest && II.getRawSource() == OldPtr))(((IsDest && II.getRawDest() == OldPtr) || (!IsDest && II.getRawSource() == OldPtr)) ? static_cast<void> (0) : __assert_fail ("(IsDest && II.getRawDest() == OldPtr) || (!IsDest && II.getRawSource() == OldPtr)" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2881, __PRETTY_FUNCTION__)); | |||||
2882 | ||||||
2883 | unsigned SliceAlign = getSliceAlign(); | |||||
2884 | ||||||
2885 | // For unsplit intrinsics, we simply modify the source and destination | |||||
2886 | // pointers in place. This isn't just an optimization, it is a matter of | |||||
2887 | // correctness. With unsplit intrinsics we may be dealing with transfers | |||||
2888 | // within a single alloca before SROA ran, or with transfers that have | |||||
2889 | // a variable length. We may also be dealing with memmove instead of | |||||
2890 | // memcpy, and so simply updating the pointers is the necessary for us to | |||||
2891 | // update both source and dest of a single call. | |||||
2892 | if (!IsSplittable) { | |||||
2893 | Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); | |||||
2894 | if (IsDest) | |||||
2895 | II.setDest(AdjustedPtr); | |||||
2896 | else | |||||
2897 | II.setSource(AdjustedPtr); | |||||
2898 | ||||||
2899 | if (II.getAlignment() > SliceAlign) { | |||||
2900 | Type *CstTy = II.getAlignmentCst()->getType(); | |||||
2901 | II.setAlignment( | |||||
2902 | ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign))); | |||||
2903 | } | |||||
2904 | ||||||
2905 | DEBUG(dbgs() << " to: " << II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << II << "\n"; } } while (0); | |||||
2906 | deleteIfTriviallyDead(OldPtr); | |||||
2907 | return false; | |||||
2908 | } | |||||
2909 | // For split transfer intrinsics we have an incredibly useful assurance: | |||||
2910 | // the source and destination do not reside within the same alloca, and at | |||||
2911 | // least one of them does not escape. This means that we can replace | |||||
2912 | // memmove with memcpy, and we don't need to worry about all manner of | |||||
2913 | // downsides to splitting and transforming the operations. | |||||
2914 | ||||||
2915 | // If this doesn't map cleanly onto the alloca type, and that type isn't | |||||
2916 | // a single value type, just emit a memcpy. | |||||
2917 | bool EmitMemCpy = | |||||
2918 | !VecTy && !IntTy && | |||||
2919 | (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset || | |||||
2920 | SliceSize != DL.getTypeStoreSize(NewAI.getAllocatedType()) || | |||||
2921 | !NewAI.getAllocatedType()->isSingleValueType()); | |||||
2922 | ||||||
2923 | // If we're just going to emit a memcpy, the alloca hasn't changed, and the | |||||
2924 | // size hasn't been shrunk based on analysis of the viable range, this is | |||||
2925 | // a no-op. | |||||
2926 | if (EmitMemCpy && &OldAI == &NewAI) { | |||||
2927 | // Ensure the start lines up. | |||||
2928 | assert(NewBeginOffset == BeginOffset)((NewBeginOffset == BeginOffset) ? static_cast<void> (0 ) : __assert_fail ("NewBeginOffset == BeginOffset", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2928, __PRETTY_FUNCTION__)); | |||||
2929 | ||||||
2930 | // Rewrite the size as needed. | |||||
2931 | if (NewEndOffset != EndOffset) | |||||
2932 | II.setLength(ConstantInt::get(II.getLength()->getType(), | |||||
2933 | NewEndOffset - NewBeginOffset)); | |||||
2934 | return false; | |||||
2935 | } | |||||
2936 | // Record this instruction for deletion. | |||||
2937 | Pass.DeadInsts.insert(&II); | |||||
2938 | ||||||
2939 | // Strip all inbounds GEPs and pointer casts to try to dig out any root | |||||
2940 | // alloca that should be re-examined after rewriting this instruction. | |||||
2941 | Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); | |||||
2942 | if (AllocaInst *AI = | |||||
2943 | dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) { | |||||
2944 | assert(AI != &OldAI && AI != &NewAI &&((AI != &OldAI && AI != &NewAI && "Splittable transfers cannot reach the same alloca on both ends." ) ? static_cast<void> (0) : __assert_fail ("AI != &OldAI && AI != &NewAI && \"Splittable transfers cannot reach the same alloca on both ends.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2945, __PRETTY_FUNCTION__)) | |||||
2945 | "Splittable transfers cannot reach the same alloca on both ends.")((AI != &OldAI && AI != &NewAI && "Splittable transfers cannot reach the same alloca on both ends." ) ? static_cast<void> (0) : __assert_fail ("AI != &OldAI && AI != &NewAI && \"Splittable transfers cannot reach the same alloca on both ends.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 2945, __PRETTY_FUNCTION__)); | |||||
2946 | Pass.Worklist.insert(AI); | |||||
2947 | } | |||||
2948 | ||||||
2949 | Type *OtherPtrTy = OtherPtr->getType(); | |||||
2950 | unsigned OtherAS = OtherPtrTy->getPointerAddressSpace(); | |||||
2951 | ||||||
2952 | // Compute the relative offset for the other pointer within the transfer. | |||||
2953 | unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS); | |||||
2954 | APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset); | |||||
2955 | unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1, | |||||
2956 | OtherOffset.zextOrTrunc(64).getZExtValue()); | |||||
2957 | ||||||
2958 | if (EmitMemCpy) { | |||||
2959 | // Compute the other pointer, folding as much as possible to produce | |||||
2960 | // a single, simple GEP in most cases. | |||||
2961 | OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, | |||||
2962 | OtherPtr->getName() + "."); | |||||
2963 | ||||||
2964 | Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); | |||||
2965 | Type *SizeTy = II.getLength()->getType(); | |||||
2966 | Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); | |||||
2967 | ||||||
2968 | CallInst *New = IRB.CreateMemCpy( | |||||
2969 | IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size, | |||||
2970 | MinAlign(SliceAlign, OtherAlign), II.isVolatile()); | |||||
2971 | (void)New; | |||||
2972 | DEBUG(dbgs() << " to: " << *New << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *New << "\n"; } } while (0); | |||||
2973 | return false; | |||||
2974 | } | |||||
2975 | ||||||
2976 | bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset && | |||||
2977 | NewEndOffset == NewAllocaEndOffset; | |||||
2978 | uint64_t Size = NewEndOffset - NewBeginOffset; | |||||
2979 | unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0; | |||||
2980 | unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0; | |||||
2981 | unsigned NumElements = EndIndex - BeginIndex; | |||||
2982 | IntegerType *SubIntTy = | |||||
2983 | IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr; | |||||
2984 | ||||||
2985 | // Reset the other pointer type to match the register type we're going to | |||||
2986 | // use, but using the address space of the original other pointer. | |||||
2987 | if (VecTy && !IsWholeAlloca) { | |||||
2988 | if (NumElements == 1) | |||||
2989 | OtherPtrTy = VecTy->getElementType(); | |||||
2990 | else | |||||
2991 | OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements); | |||||
2992 | ||||||
2993 | OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS); | |||||
2994 | } else if (IntTy && !IsWholeAlloca) { | |||||
2995 | OtherPtrTy = SubIntTy->getPointerTo(OtherAS); | |||||
2996 | } else { | |||||
2997 | OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS); | |||||
2998 | } | |||||
2999 | ||||||
3000 | Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, | |||||
3001 | OtherPtr->getName() + "."); | |||||
3002 | unsigned SrcAlign = OtherAlign; | |||||
3003 | Value *DstPtr = &NewAI; | |||||
3004 | unsigned DstAlign = SliceAlign; | |||||
3005 | if (!IsDest) { | |||||
3006 | std::swap(SrcPtr, DstPtr); | |||||
3007 | std::swap(SrcAlign, DstAlign); | |||||
3008 | } | |||||
3009 | ||||||
3010 | Value *Src; | |||||
3011 | if (VecTy && !IsWholeAlloca && !IsDest) { | |||||
3012 | Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load"); | |||||
3013 | Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); | |||||
3014 | } else if (IntTy && !IsWholeAlloca && !IsDest) { | |||||
3015 | Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load"); | |||||
3016 | Src = convertValue(DL, IRB, Src, IntTy); | |||||
3017 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||||
3018 | Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract"); | |||||
3019 | } else { | |||||
3020 | Src = | |||||
3021 | IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(), "copyload"); | |||||
3022 | } | |||||
3023 | ||||||
3024 | if (VecTy && !IsWholeAlloca && IsDest) { | |||||
3025 | Value *Old = | |||||
3026 | IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload"); | |||||
3027 | Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); | |||||
3028 | } else if (IntTy && !IsWholeAlloca && IsDest) { | |||||
3029 | Value *Old = | |||||
3030 | IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload"); | |||||
3031 | Old = convertValue(DL, IRB, Old, IntTy); | |||||
3032 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||||
3033 | Src = insertInteger(DL, IRB, Old, Src, Offset, "insert"); | |||||
3034 | Src = convertValue(DL, IRB, Src, NewAllocaTy); | |||||
3035 | } | |||||
3036 | ||||||
3037 | StoreInst *Store = cast<StoreInst>( | |||||
3038 | IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); | |||||
3039 | (void)Store; | |||||
3040 | DEBUG(dbgs() << " to: " << *Store << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *Store << "\n"; } } while (0); | |||||
3041 | return !II.isVolatile(); | |||||
3042 | } | |||||
3043 | ||||||
3044 | bool visitIntrinsicInst(IntrinsicInst &II) { | |||||
3045 | assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||((II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID () == Intrinsic::lifetime_end) ? static_cast<void> (0) : __assert_fail ("II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3046, __PRETTY_FUNCTION__)) | |||||
3046 | II.getIntrinsicID() == Intrinsic::lifetime_end)((II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID () == Intrinsic::lifetime_end) ? static_cast<void> (0) : __assert_fail ("II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3046, __PRETTY_FUNCTION__)); | |||||
3047 | DEBUG(dbgs() << " original: " << II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << II << "\n"; } } while (0); | |||||
3048 | assert(II.getArgOperand(1) == OldPtr)((II.getArgOperand(1) == OldPtr) ? static_cast<void> (0 ) : __assert_fail ("II.getArgOperand(1) == OldPtr", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3048, __PRETTY_FUNCTION__)); | |||||
3049 | ||||||
3050 | // Record this instruction for deletion. | |||||
3051 | Pass.DeadInsts.insert(&II); | |||||
3052 | ||||||
3053 | ConstantInt *Size = | |||||
3054 | ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), | |||||
3055 | NewEndOffset - NewBeginOffset); | |||||
3056 | Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); | |||||
3057 | Value *New; | |||||
3058 | if (II.getIntrinsicID() == Intrinsic::lifetime_start) | |||||
3059 | New = IRB.CreateLifetimeStart(Ptr, Size); | |||||
3060 | else | |||||
3061 | New = IRB.CreateLifetimeEnd(Ptr, Size); | |||||
3062 | ||||||
3063 | (void)New; | |||||
3064 | DEBUG(dbgs() << " to: " << *New << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *New << "\n"; } } while (0); | |||||
3065 | return true; | |||||
3066 | } | |||||
3067 | ||||||
3068 | bool visitPHINode(PHINode &PN) { | |||||
3069 | DEBUG(dbgs() << " original: " << PN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << PN << "\n"; } } while (0); | |||||
3070 | assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable")((BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable" ) ? static_cast<void> (0) : __assert_fail ("BeginOffset >= NewAllocaBeginOffset && \"PHIs are unsplittable\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3070, __PRETTY_FUNCTION__)); | |||||
3071 | assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable")((EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable" ) ? static_cast<void> (0) : __assert_fail ("EndOffset <= NewAllocaEndOffset && \"PHIs are unsplittable\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3071, __PRETTY_FUNCTION__)); | |||||
3072 | ||||||
3073 | // We would like to compute a new pointer in only one place, but have it be | |||||
3074 | // as local as possible to the PHI. To do that, we re-use the location of | |||||
3075 | // the old pointer, which necessarily must be in the right position to | |||||
3076 | // dominate the PHI. | |||||
3077 | IRBuilderTy PtrBuilder(IRB); | |||||
3078 | if (isa<PHINode>(OldPtr)) | |||||
3079 | PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt()); | |||||
3080 | else | |||||
3081 | PtrBuilder.SetInsertPoint(OldPtr); | |||||
3082 | PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc()); | |||||
3083 | ||||||
3084 | Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType()); | |||||
3085 | // Replace the operands which were using the old pointer. | |||||
3086 | std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr); | |||||
3087 | ||||||
3088 | DEBUG(dbgs() << " to: " << PN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << PN << "\n"; } } while (0); | |||||
3089 | deleteIfTriviallyDead(OldPtr); | |||||
3090 | ||||||
3091 | // PHIs can't be promoted on their own, but often can be speculated. We | |||||
3092 | // check the speculation outside of the rewriter so that we see the | |||||
3093 | // fully-rewritten alloca. | |||||
3094 | PHIUsers.insert(&PN); | |||||
3095 | return true; | |||||
3096 | } | |||||
3097 | ||||||
3098 | bool visitSelectInst(SelectInst &SI) { | |||||
3099 | DEBUG(dbgs() << " original: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << SI << "\n"; } } while (0); | |||||
3100 | assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&(((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr ) && "Pointer isn't an operand!") ? static_cast<void > (0) : __assert_fail ("(SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && \"Pointer isn't an operand!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3101, __PRETTY_FUNCTION__)) | |||||
3101 | "Pointer isn't an operand!")(((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr ) && "Pointer isn't an operand!") ? static_cast<void > (0) : __assert_fail ("(SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && \"Pointer isn't an operand!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3101, __PRETTY_FUNCTION__)); | |||||
3102 | assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable")((BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable" ) ? static_cast<void> (0) : __assert_fail ("BeginOffset >= NewAllocaBeginOffset && \"Selects are unsplittable\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3102, __PRETTY_FUNCTION__)); | |||||
3103 | assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable")((EndOffset <= NewAllocaEndOffset && "Selects are unsplittable" ) ? static_cast<void> (0) : __assert_fail ("EndOffset <= NewAllocaEndOffset && \"Selects are unsplittable\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3103, __PRETTY_FUNCTION__)); | |||||
3104 | ||||||
3105 | Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); | |||||
3106 | // Replace the operands which were using the old pointer. | |||||
3107 | if (SI.getOperand(1) == OldPtr) | |||||
3108 | SI.setOperand(1, NewPtr); | |||||
3109 | if (SI.getOperand(2) == OldPtr) | |||||
3110 | SI.setOperand(2, NewPtr); | |||||
3111 | ||||||
3112 | DEBUG(dbgs() << " to: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << SI << "\n"; } } while (0); | |||||
3113 | deleteIfTriviallyDead(OldPtr); | |||||
3114 | ||||||
3115 | // Selects can't be promoted on their own, but often can be speculated. We | |||||
3116 | // check the speculation outside of the rewriter so that we see the | |||||
3117 | // fully-rewritten alloca. | |||||
3118 | SelectUsers.insert(&SI); | |||||
3119 | return true; | |||||
3120 | } | |||||
3121 | }; | |||||
3122 | } | |||||
3123 | ||||||
3124 | namespace { | |||||
3125 | /// \brief Visitor to rewrite aggregate loads and stores as scalar. | |||||
3126 | /// | |||||
3127 | /// This pass aggressively rewrites all aggregate loads and stores on | |||||
3128 | /// a particular pointer (or any pointer derived from it which we can identify) | |||||
3129 | /// with scalar loads and stores. | |||||
3130 | class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { | |||||
3131 | // Befriend the base class so it can delegate to private visit methods. | |||||
3132 | friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>; | |||||
3133 | ||||||
3134 | const DataLayout &DL; | |||||
3135 | ||||||
3136 | /// Queue of pointer uses to analyze and potentially rewrite. | |||||
3137 | SmallVector<Use *, 8> Queue; | |||||
3138 | ||||||
3139 | /// Set to prevent us from cycling with phi nodes and loops. | |||||
3140 | SmallPtrSet<User *, 8> Visited; | |||||
3141 | ||||||
3142 | /// The current pointer use being rewritten. This is used to dig up the used | |||||
3143 | /// value (as opposed to the user). | |||||
3144 | Use *U; | |||||
3145 | ||||||
3146 | public: | |||||
3147 | AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {} | |||||
3148 | ||||||
3149 | /// Rewrite loads and stores through a pointer and all pointers derived from | |||||
3150 | /// it. | |||||
3151 | bool rewrite(Instruction &I) { | |||||
3152 | DEBUG(dbgs() << " Rewriting FCA loads and stores...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Rewriting FCA loads and stores...\n" ; } } while (0); | |||||
3153 | enqueueUsers(I); | |||||
3154 | bool Changed = false; | |||||
3155 | while (!Queue.empty()) { | |||||
3156 | U = Queue.pop_back_val(); | |||||
3157 | Changed |= visit(cast<Instruction>(U->getUser())); | |||||
3158 | } | |||||
3159 | return Changed; | |||||
3160 | } | |||||
3161 | ||||||
3162 | private: | |||||
3163 | /// Enqueue all the users of the given instruction for further processing. | |||||
3164 | /// This uses a set to de-duplicate users. | |||||
3165 | void enqueueUsers(Instruction &I) { | |||||
3166 | for (Use &U : I.uses()) | |||||
3167 | if (Visited.insert(U.getUser()).second) | |||||
3168 | Queue.push_back(&U); | |||||
3169 | } | |||||
3170 | ||||||
3171 | // Conservative default is to not rewrite anything. | |||||
3172 | bool visitInstruction(Instruction &I) { return false; } | |||||
3173 | ||||||
3174 | /// \brief Generic recursive split emission class. | |||||
3175 | template <typename Derived> class OpSplitter { | |||||
3176 | protected: | |||||
3177 | /// The builder used to form new instructions. | |||||
3178 | IRBuilderTy IRB; | |||||
3179 | /// The indices which to be used with insert- or extractvalue to select the | |||||
3180 | /// appropriate value within the aggregate. | |||||
3181 | SmallVector<unsigned, 4> Indices; | |||||
3182 | /// The indices to a GEP instruction which will move Ptr to the correct slot | |||||
3183 | /// within the aggregate. | |||||
3184 | SmallVector<Value *, 4> GEPIndices; | |||||
3185 | /// The base pointer of the original op, used as a base for GEPing the | |||||
3186 | /// split operations. | |||||
3187 | Value *Ptr; | |||||
3188 | ||||||
3189 | /// Initialize the splitter with an insertion point, Ptr and start with a | |||||
3190 | /// single zero GEP index. | |||||
3191 | OpSplitter(Instruction *InsertionPoint, Value *Ptr) | |||||
3192 | : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {} | |||||
3193 | ||||||
3194 | public: | |||||
3195 | /// \brief Generic recursive split emission routine. | |||||
3196 | /// | |||||
3197 | /// This method recursively splits an aggregate op (load or store) into | |||||
3198 | /// scalar or vector ops. It splits recursively until it hits a single value | |||||
3199 | /// and emits that single value operation via the template argument. | |||||
3200 | /// | |||||
3201 | /// The logic of this routine relies on GEPs and insertvalue and | |||||
3202 | /// extractvalue all operating with the same fundamental index list, merely | |||||
3203 | /// formatted differently (GEPs need actual values). | |||||
3204 | /// | |||||
3205 | /// \param Ty The type being split recursively into smaller ops. | |||||
3206 | /// \param Agg The aggregate value being built up or stored, depending on | |||||
3207 | /// whether this is splitting a load or a store respectively. | |||||
3208 | void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { | |||||
3209 | if (Ty->isSingleValueType()) | |||||
3210 | return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name); | |||||
3211 | ||||||
3212 | if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { | |||||
3213 | unsigned OldSize = Indices.size(); | |||||
3214 | (void)OldSize; | |||||
3215 | for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size; | |||||
3216 | ++Idx) { | |||||
3217 | assert(Indices.size() == OldSize && "Did not return to the old size")((Indices.size() == OldSize && "Did not return to the old size" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() == OldSize && \"Did not return to the old size\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3217, __PRETTY_FUNCTION__)); | |||||
3218 | Indices.push_back(Idx); | |||||
3219 | GEPIndices.push_back(IRB.getInt32(Idx)); | |||||
3220 | emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx)); | |||||
3221 | GEPIndices.pop_back(); | |||||
3222 | Indices.pop_back(); | |||||
3223 | } | |||||
3224 | return; | |||||
3225 | } | |||||
3226 | ||||||
3227 | if (StructType *STy = dyn_cast<StructType>(Ty)) { | |||||
3228 | unsigned OldSize = Indices.size(); | |||||
3229 | (void)OldSize; | |||||
3230 | for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size; | |||||
3231 | ++Idx) { | |||||
3232 | assert(Indices.size() == OldSize && "Did not return to the old size")((Indices.size() == OldSize && "Did not return to the old size" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() == OldSize && \"Did not return to the old size\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3232, __PRETTY_FUNCTION__)); | |||||
3233 | Indices.push_back(Idx); | |||||
3234 | GEPIndices.push_back(IRB.getInt32(Idx)); | |||||
3235 | emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx)); | |||||
3236 | GEPIndices.pop_back(); | |||||
3237 | Indices.pop_back(); | |||||
3238 | } | |||||
3239 | return; | |||||
3240 | } | |||||
3241 | ||||||
3242 | llvm_unreachable("Only arrays and structs are aggregate loadable types")::llvm::llvm_unreachable_internal("Only arrays and structs are aggregate loadable types" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3242); | |||||
3243 | } | |||||
3244 | }; | |||||
3245 | ||||||
3246 | struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { | |||||
3247 | LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr) | |||||
3248 | : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {} | |||||
3249 | ||||||
3250 | /// Emit a leaf load of a single value. This is called at the leaves of the | |||||
3251 | /// recursive emission to actually load values. | |||||
3252 | void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { | |||||
3253 | assert(Ty->isSingleValueType())((Ty->isSingleValueType()) ? static_cast<void> (0) : __assert_fail ("Ty->isSingleValueType()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3253, __PRETTY_FUNCTION__)); | |||||
3254 | // Load the single value and insert it using the indices. | |||||
3255 | Value *GEP = | |||||
3256 | IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); | |||||
3257 | Value *Load = IRB.CreateLoad(GEP, Name + ".load"); | |||||
3258 | Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); | |||||
3259 | DEBUG(dbgs() << " to: " << *Load << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *Load << "\n"; } } while (0); | |||||
3260 | } | |||||
3261 | }; | |||||
3262 | ||||||
3263 | bool visitLoadInst(LoadInst &LI) { | |||||
3264 | assert(LI.getPointerOperand() == *U)((LI.getPointerOperand() == *U) ? static_cast<void> (0) : __assert_fail ("LI.getPointerOperand() == *U", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3264, __PRETTY_FUNCTION__)); | |||||
3265 | if (!LI.isSimple() || LI.getType()->isSingleValueType()) | |||||
3266 | return false; | |||||
3267 | ||||||
3268 | // We have an aggregate being loaded, split it apart. | |||||
3269 | DEBUG(dbgs() << " original: " << LI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << LI << "\n"; } } while (0); | |||||
3270 | LoadOpSplitter Splitter(&LI, *U); | |||||
3271 | Value *V = UndefValue::get(LI.getType()); | |||||
3272 | Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); | |||||
3273 | LI.replaceAllUsesWith(V); | |||||
3274 | LI.eraseFromParent(); | |||||
3275 | return true; | |||||
3276 | } | |||||
3277 | ||||||
3278 | struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> { | |||||
3279 | StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr) | |||||
3280 | : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {} | |||||
3281 | ||||||
3282 | /// Emit a leaf store of a single value. This is called at the leaves of the | |||||
3283 | /// recursive emission to actually produce stores. | |||||
3284 | void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { | |||||
3285 | assert(Ty->isSingleValueType())((Ty->isSingleValueType()) ? static_cast<void> (0) : __assert_fail ("Ty->isSingleValueType()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3285, __PRETTY_FUNCTION__)); | |||||
3286 | // Extract the single value and store it using the indices. | |||||
3287 | Value *Store = IRB.CreateStore( | |||||
3288 | IRB.CreateExtractValue(Agg, Indices, Name + ".extract"), | |||||
3289 | IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep")); | |||||
3290 | (void)Store; | |||||
3291 | DEBUG(dbgs() << " to: " << *Store << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " to: " << *Store << "\n"; } } while (0); | |||||
3292 | } | |||||
3293 | }; | |||||
3294 | ||||||
3295 | bool visitStoreInst(StoreInst &SI) { | |||||
3296 | if (!SI.isSimple() || SI.getPointerOperand() != *U) | |||||
3297 | return false; | |||||
3298 | Value *V = SI.getValueOperand(); | |||||
3299 | if (V->getType()->isSingleValueType()) | |||||
3300 | return false; | |||||
3301 | ||||||
3302 | // We have an aggregate being stored, split it apart. | |||||
3303 | DEBUG(dbgs() << " original: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " original: " << SI << "\n"; } } while (0); | |||||
3304 | StoreOpSplitter Splitter(&SI, *U); | |||||
3305 | Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); | |||||
3306 | SI.eraseFromParent(); | |||||
3307 | return true; | |||||
3308 | } | |||||
3309 | ||||||
3310 | bool visitBitCastInst(BitCastInst &BC) { | |||||
3311 | enqueueUsers(BC); | |||||
3312 | return false; | |||||
3313 | } | |||||
3314 | ||||||
3315 | bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { | |||||
3316 | enqueueUsers(GEPI); | |||||
3317 | return false; | |||||
3318 | } | |||||
3319 | ||||||
3320 | bool visitPHINode(PHINode &PN) { | |||||
3321 | enqueueUsers(PN); | |||||
3322 | return false; | |||||
3323 | } | |||||
3324 | ||||||
3325 | bool visitSelectInst(SelectInst &SI) { | |||||
3326 | enqueueUsers(SI); | |||||
3327 | return false; | |||||
3328 | } | |||||
3329 | }; | |||||
3330 | } | |||||
3331 | ||||||
3332 | /// \brief Strip aggregate type wrapping. | |||||
3333 | /// | |||||
3334 | /// This removes no-op aggregate types wrapping an underlying type. It will | |||||
3335 | /// strip as many layers of types as it can without changing either the type | |||||
3336 | /// size or the allocated size. | |||||
3337 | static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { | |||||
3338 | if (Ty->isSingleValueType()) | |||||
3339 | return Ty; | |||||
3340 | ||||||
3341 | uint64_t AllocSize = DL.getTypeAllocSize(Ty); | |||||
3342 | uint64_t TypeSize = DL.getTypeSizeInBits(Ty); | |||||
3343 | ||||||
3344 | Type *InnerTy; | |||||
3345 | if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { | |||||
3346 | InnerTy = ArrTy->getElementType(); | |||||
3347 | } else if (StructType *STy = dyn_cast<StructType>(Ty)) { | |||||
3348 | const StructLayout *SL = DL.getStructLayout(STy); | |||||
3349 | unsigned Index = SL->getElementContainingOffset(0); | |||||
3350 | InnerTy = STy->getElementType(Index); | |||||
3351 | } else { | |||||
3352 | return Ty; | |||||
3353 | } | |||||
3354 | ||||||
3355 | if (AllocSize > DL.getTypeAllocSize(InnerTy) || | |||||
3356 | TypeSize > DL.getTypeSizeInBits(InnerTy)) | |||||
3357 | return Ty; | |||||
3358 | ||||||
3359 | return stripAggregateTypeWrapping(DL, InnerTy); | |||||
3360 | } | |||||
3361 | ||||||
3362 | /// \brief Try to find a partition of the aggregate type passed in for a given | |||||
3363 | /// offset and size. | |||||
3364 | /// | |||||
3365 | /// This recurses through the aggregate type and tries to compute a subtype | |||||
3366 | /// based on the offset and size. When the offset and size span a sub-section | |||||
3367 | /// of an array, it will even compute a new array type for that sub-section, | |||||
3368 | /// and the same for structs. | |||||
3369 | /// | |||||
3370 | /// Note that this routine is very strict and tries to find a partition of the | |||||
3371 | /// type which produces the *exact* right offset and size. It is not forgiving | |||||
3372 | /// when the size or offset cause either end of type-based partition to be off. | |||||
3373 | /// Also, this is a best-effort routine. It is reasonable to give up and not | |||||
3374 | /// return a type if necessary. | |||||
3375 | static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, | |||||
3376 | uint64_t Size) { | |||||
3377 | if (Offset == 0 && DL.getTypeAllocSize(Ty) == Size) | |||||
3378 | return stripAggregateTypeWrapping(DL, Ty); | |||||
3379 | if (Offset > DL.getTypeAllocSize(Ty) || | |||||
3380 | (DL.getTypeAllocSize(Ty) - Offset) < Size) | |||||
3381 | return nullptr; | |||||
3382 | ||||||
3383 | if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { | |||||
3384 | // We can't partition pointers... | |||||
3385 | if (SeqTy->isPointerTy()) | |||||
3386 | return nullptr; | |||||
3387 | ||||||
3388 | Type *ElementTy = SeqTy->getElementType(); | |||||
3389 | uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); | |||||
3390 | uint64_t NumSkippedElements = Offset / ElementSize; | |||||
3391 | if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { | |||||
3392 | if (NumSkippedElements >= ArrTy->getNumElements()) | |||||
3393 | return nullptr; | |||||
3394 | } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { | |||||
3395 | if (NumSkippedElements >= VecTy->getNumElements()) | |||||
3396 | return nullptr; | |||||
3397 | } | |||||
3398 | Offset -= NumSkippedElements * ElementSize; | |||||
3399 | ||||||
3400 | // First check if we need to recurse. | |||||
3401 | if (Offset > 0 || Size < ElementSize) { | |||||
3402 | // Bail if the partition ends in a different array element. | |||||
3403 | if ((Offset + Size) > ElementSize) | |||||
3404 | return nullptr; | |||||
3405 | // Recurse through the element type trying to peel off offset bytes. | |||||
3406 | return getTypePartition(DL, ElementTy, Offset, Size); | |||||
3407 | } | |||||
3408 | assert(Offset == 0)((Offset == 0) ? static_cast<void> (0) : __assert_fail ( "Offset == 0", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3408, __PRETTY_FUNCTION__)); | |||||
3409 | ||||||
3410 | if (Size == ElementSize) | |||||
3411 | return stripAggregateTypeWrapping(DL, ElementTy); | |||||
3412 | assert(Size > ElementSize)((Size > ElementSize) ? static_cast<void> (0) : __assert_fail ("Size > ElementSize", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3412, __PRETTY_FUNCTION__)); | |||||
3413 | uint64_t NumElements = Size / ElementSize; | |||||
3414 | if (NumElements * ElementSize != Size) | |||||
3415 | return nullptr; | |||||
3416 | return ArrayType::get(ElementTy, NumElements); | |||||
3417 | } | |||||
3418 | ||||||
3419 | StructType *STy = dyn_cast<StructType>(Ty); | |||||
3420 | if (!STy) | |||||
3421 | return nullptr; | |||||
3422 | ||||||
3423 | const StructLayout *SL = DL.getStructLayout(STy); | |||||
3424 | if (Offset >= SL->getSizeInBytes()) | |||||
3425 | return nullptr; | |||||
3426 | uint64_t EndOffset = Offset + Size; | |||||
3427 | if (EndOffset > SL->getSizeInBytes()) | |||||
3428 | return nullptr; | |||||
3429 | ||||||
3430 | unsigned Index = SL->getElementContainingOffset(Offset); | |||||
3431 | Offset -= SL->getElementOffset(Index); | |||||
3432 | ||||||
3433 | Type *ElementTy = STy->getElementType(Index); | |||||
3434 | uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); | |||||
3435 | if (Offset >= ElementSize) | |||||
3436 | return nullptr; // The offset points into alignment padding. | |||||
3437 | ||||||
3438 | // See if any partition must be contained by the element. | |||||
3439 | if (Offset > 0 || Size < ElementSize) { | |||||
3440 | if ((Offset + Size) > ElementSize) | |||||
3441 | return nullptr; | |||||
3442 | return getTypePartition(DL, ElementTy, Offset, Size); | |||||
3443 | } | |||||
3444 | assert(Offset == 0)((Offset == 0) ? static_cast<void> (0) : __assert_fail ( "Offset == 0", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3444, __PRETTY_FUNCTION__)); | |||||
3445 | ||||||
3446 | if (Size == ElementSize) | |||||
3447 | return stripAggregateTypeWrapping(DL, ElementTy); | |||||
3448 | ||||||
3449 | StructType::element_iterator EI = STy->element_begin() + Index, | |||||
3450 | EE = STy->element_end(); | |||||
3451 | if (EndOffset < SL->getSizeInBytes()) { | |||||
3452 | unsigned EndIndex = SL->getElementContainingOffset(EndOffset); | |||||
3453 | if (Index == EndIndex) | |||||
3454 | return nullptr; // Within a single element and its padding. | |||||
3455 | ||||||
3456 | // Don't try to form "natural" types if the elements don't line up with the | |||||
3457 | // expected size. | |||||
3458 | // FIXME: We could potentially recurse down through the last element in the | |||||
3459 | // sub-struct to find a natural end point. | |||||
3460 | if (SL->getElementOffset(EndIndex) != EndOffset) | |||||
3461 | return nullptr; | |||||
3462 | ||||||
3463 | assert(Index < EndIndex)((Index < EndIndex) ? static_cast<void> (0) : __assert_fail ("Index < EndIndex", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3463, __PRETTY_FUNCTION__)); | |||||
3464 | EE = STy->element_begin() + EndIndex; | |||||
3465 | } | |||||
3466 | ||||||
3467 | // Try to build up a sub-structure. | |||||
3468 | StructType *SubTy = | |||||
3469 | StructType::get(STy->getContext(), makeArrayRef(EI, EE), STy->isPacked()); | |||||
3470 | const StructLayout *SubSL = DL.getStructLayout(SubTy); | |||||
3471 | if (Size != SubSL->getSizeInBytes()) | |||||
3472 | return nullptr; // The sub-struct doesn't have quite the size needed. | |||||
3473 | ||||||
3474 | return SubTy; | |||||
3475 | } | |||||
3476 | ||||||
3477 | /// \brief Pre-split loads and stores to simplify rewriting. | |||||
3478 | /// | |||||
3479 | /// We want to break up the splittable load+store pairs as much as | |||||
3480 | /// possible. This is important to do as a preprocessing step, as once we | |||||
3481 | /// start rewriting the accesses to partitions of the alloca we lose the | |||||
3482 | /// necessary information to correctly split apart paired loads and stores | |||||
3483 | /// which both point into this alloca. The case to consider is something like | |||||
3484 | /// the following: | |||||
3485 | /// | |||||
3486 | /// %a = alloca [12 x i8] | |||||
3487 | /// %gep1 = getelementptr [12 x i8]* %a, i32 0, i32 0 | |||||
3488 | /// %gep2 = getelementptr [12 x i8]* %a, i32 0, i32 4 | |||||
3489 | /// %gep3 = getelementptr [12 x i8]* %a, i32 0, i32 8 | |||||
3490 | /// %iptr1 = bitcast i8* %gep1 to i64* | |||||
3491 | /// %iptr2 = bitcast i8* %gep2 to i64* | |||||
3492 | /// %fptr1 = bitcast i8* %gep1 to float* | |||||
3493 | /// %fptr2 = bitcast i8* %gep2 to float* | |||||
3494 | /// %fptr3 = bitcast i8* %gep3 to float* | |||||
3495 | /// store float 0.0, float* %fptr1 | |||||
3496 | /// store float 1.0, float* %fptr2 | |||||
3497 | /// %v = load i64* %iptr1 | |||||
3498 | /// store i64 %v, i64* %iptr2 | |||||
3499 | /// %f1 = load float* %fptr2 | |||||
3500 | /// %f2 = load float* %fptr3 | |||||
3501 | /// | |||||
3502 | /// Here we want to form 3 partitions of the alloca, each 4 bytes large, and | |||||
3503 | /// promote everything so we recover the 2 SSA values that should have been | |||||
3504 | /// there all along. | |||||
3505 | /// | |||||
3506 | /// \returns true if any changes are made. | |||||
3507 | bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { | |||||
3508 | DEBUG(dbgs() << "Pre-splitting loads and stores\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "Pre-splitting loads and stores\n" ; } } while (0); | |||||
3509 | ||||||
3510 | // Track the loads and stores which are candidates for pre-splitting here, in | |||||
3511 | // the order they first appear during the partition scan. These give stable | |||||
3512 | // iteration order and a basis for tracking which loads and stores we | |||||
3513 | // actually split. | |||||
3514 | SmallVector<LoadInst *, 4> Loads; | |||||
3515 | SmallVector<StoreInst *, 4> Stores; | |||||
3516 | ||||||
3517 | // We need to accumulate the splits required of each load or store where we | |||||
3518 | // can find them via a direct lookup. This is important to cross-check loads | |||||
3519 | // and stores against each other. We also track the slice so that we can kill | |||||
3520 | // all the slices that end up split. | |||||
3521 | struct SplitOffsets { | |||||
3522 | Slice *S; | |||||
3523 | std::vector<uint64_t> Splits; | |||||
3524 | }; | |||||
3525 | SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap; | |||||
3526 | ||||||
3527 | // Track loads out of this alloca which cannot, for any reason, be pre-split. | |||||
3528 | // This is important as we also cannot pre-split stores of those loads! | |||||
3529 | // FIXME: This is all pretty gross. It means that we can be more aggressive | |||||
3530 | // in pre-splitting when the load feeding the store happens to come from | |||||
3531 | // a separate alloca. Put another way, the effectiveness of SROA would be | |||||
3532 | // decreased by a frontend which just concatenated all of its local allocas | |||||
3533 | // into one big flat alloca. But defeating such patterns is exactly the job | |||||
3534 | // SROA is tasked with! Sadly, to not have this discrepancy we would have | |||||
3535 | // change store pre-splitting to actually force pre-splitting of the load | |||||
3536 | // that feeds it *and all stores*. That makes pre-splitting much harder, but | |||||
3537 | // maybe it would make it more principled? | |||||
3538 | SmallPtrSet<LoadInst *, 8> UnsplittableLoads; | |||||
3539 | ||||||
3540 | DEBUG(dbgs() << " Searching for candidate loads and stores\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Searching for candidate loads and stores\n" ; } } while (0); | |||||
3541 | for (auto &P : AS.partitions()) { | |||||
3542 | for (Slice &S : P) { | |||||
3543 | Instruction *I = cast<Instruction>(S.getUse()->getUser()); | |||||
3544 | if (!S.isSplittable() ||S.endOffset() <= P.endOffset()) { | |||||
3545 | // If this was a load we have to track that it can't participate in any | |||||
3546 | // pre-splitting! | |||||
3547 | if (auto *LI = dyn_cast<LoadInst>(I)) | |||||
3548 | UnsplittableLoads.insert(LI); | |||||
3549 | continue; | |||||
3550 | } | |||||
3551 | assert(P.endOffset() > S.beginOffset() &&((P.endOffset() > S.beginOffset() && "Empty or backwards partition!" ) ? static_cast<void> (0) : __assert_fail ("P.endOffset() > S.beginOffset() && \"Empty or backwards partition!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3552, __PRETTY_FUNCTION__)) | |||||
3552 | "Empty or backwards partition!")((P.endOffset() > S.beginOffset() && "Empty or backwards partition!" ) ? static_cast<void> (0) : __assert_fail ("P.endOffset() > S.beginOffset() && \"Empty or backwards partition!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3552, __PRETTY_FUNCTION__)); | |||||
3553 | ||||||
3554 | // Determine if this is a pre-splittable slice. | |||||
3555 | if (auto *LI = dyn_cast<LoadInst>(I)) { | |||||
3556 | assert(!LI->isVolatile() && "Cannot split volatile loads!")((!LI->isVolatile() && "Cannot split volatile loads!" ) ? static_cast<void> (0) : __assert_fail ("!LI->isVolatile() && \"Cannot split volatile loads!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3556, __PRETTY_FUNCTION__)); | |||||
3557 | ||||||
3558 | // The load must be used exclusively to store into other pointers for | |||||
3559 | // us to be able to arbitrarily pre-split it. The stores must also be | |||||
3560 | // simple to avoid changing semantics. | |||||
3561 | auto IsLoadSimplyStored = [](LoadInst *LI) { | |||||
3562 | for (User *LU : LI->users()) { | |||||
3563 | auto *SI = dyn_cast<StoreInst>(LU); | |||||
3564 | if (!SI || !SI->isSimple()) | |||||
3565 | return false; | |||||
3566 | } | |||||
3567 | return true; | |||||
3568 | }; | |||||
3569 | if (!IsLoadSimplyStored(LI)) { | |||||
3570 | UnsplittableLoads.insert(LI); | |||||
3571 | continue; | |||||
3572 | } | |||||
3573 | ||||||
3574 | Loads.push_back(LI); | |||||
3575 | } else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) { | |||||
3576 | if (!SI || | |||||
3577 | S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex())) | |||||
3578 | continue; | |||||
3579 | auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand()); | |||||
3580 | if (!StoredLoad || !StoredLoad->isSimple()) | |||||
3581 | continue; | |||||
3582 | assert(!SI->isVolatile() && "Cannot split volatile stores!")((!SI->isVolatile() && "Cannot split volatile stores!" ) ? static_cast<void> (0) : __assert_fail ("!SI->isVolatile() && \"Cannot split volatile stores!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3582, __PRETTY_FUNCTION__)); | |||||
3583 | ||||||
3584 | Stores.push_back(SI); | |||||
3585 | } else { | |||||
3586 | // Other uses cannot be pre-split. | |||||
3587 | continue; | |||||
3588 | } | |||||
3589 | ||||||
3590 | // Record the initial split. | |||||
3591 | DEBUG(dbgs() << " Candidate: " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Candidate: " << *I << "\n"; } } while (0); | |||||
3592 | auto &Offsets = SplitOffsetsMap[I]; | |||||
3593 | assert(Offsets.Splits.empty() &&((Offsets.Splits.empty() && "Should not have splits the first time we see an instruction!" ) ? static_cast<void> (0) : __assert_fail ("Offsets.Splits.empty() && \"Should not have splits the first time we see an instruction!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3594, __PRETTY_FUNCTION__)) | |||||
3594 | "Should not have splits the first time we see an instruction!")((Offsets.Splits.empty() && "Should not have splits the first time we see an instruction!" ) ? static_cast<void> (0) : __assert_fail ("Offsets.Splits.empty() && \"Should not have splits the first time we see an instruction!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3594, __PRETTY_FUNCTION__)); | |||||
3595 | Offsets.S = &S; | |||||
3596 | Offsets.Splits.push_back(P.endOffset() - S.beginOffset()); | |||||
3597 | } | |||||
3598 | ||||||
3599 | // Now scan the already split slices, and add a split for any of them which | |||||
3600 | // we're going to pre-split. | |||||
3601 | for (Slice *S : P.splitSliceTails()) { | |||||
3602 | auto SplitOffsetsMapI = | |||||
3603 | SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser())); | |||||
3604 | if (SplitOffsetsMapI == SplitOffsetsMap.end()) | |||||
3605 | continue; | |||||
3606 | auto &Offsets = SplitOffsetsMapI->second; | |||||
3607 | ||||||
3608 | assert(Offsets.S == S && "Found a mismatched slice!")((Offsets.S == S && "Found a mismatched slice!") ? static_cast <void> (0) : __assert_fail ("Offsets.S == S && \"Found a mismatched slice!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3608, __PRETTY_FUNCTION__)); | |||||
3609 | assert(!Offsets.Splits.empty() &&((!Offsets.Splits.empty() && "Cannot have an empty set of splits on the second partition!" ) ? static_cast<void> (0) : __assert_fail ("!Offsets.Splits.empty() && \"Cannot have an empty set of splits on the second partition!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3610, __PRETTY_FUNCTION__)) | |||||
3610 | "Cannot have an empty set of splits on the second partition!")((!Offsets.Splits.empty() && "Cannot have an empty set of splits on the second partition!" ) ? static_cast<void> (0) : __assert_fail ("!Offsets.Splits.empty() && \"Cannot have an empty set of splits on the second partition!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3610, __PRETTY_FUNCTION__)); | |||||
3611 | assert(Offsets.Splits.back() ==((Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset () && "Previous split does not end where this one begins!" ) ? static_cast<void> (0) : __assert_fail ("Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset() && \"Previous split does not end where this one begins!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3613, __PRETTY_FUNCTION__)) | |||||
3612 | P.beginOffset() - Offsets.S->beginOffset() &&((Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset () && "Previous split does not end where this one begins!" ) ? static_cast<void> (0) : __assert_fail ("Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset() && \"Previous split does not end where this one begins!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3613, __PRETTY_FUNCTION__)) | |||||
3613 | "Previous split does not end where this one begins!")((Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset () && "Previous split does not end where this one begins!" ) ? static_cast<void> (0) : __assert_fail ("Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset() && \"Previous split does not end where this one begins!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3613, __PRETTY_FUNCTION__)); | |||||
3614 | ||||||
3615 | // Record each split. The last partition's end isn't needed as the size | |||||
3616 | // of the slice dictates that. | |||||
3617 | if (S->endOffset() > P.endOffset()) | |||||
3618 | Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset()); | |||||
3619 | } | |||||
3620 | } | |||||
3621 | ||||||
3622 | // We may have split loads where some of their stores are split stores. For | |||||
3623 | // such loads and stores, we can only pre-split them if their splits exactly | |||||
3624 | // match relative to their starting offset. We have to verify this prior to | |||||
3625 | // any rewriting. | |||||
3626 | Stores.erase( | |||||
3627 | std::remove_if(Stores.begin(), Stores.end(), | |||||
3628 | [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { | |||||
3629 | // Lookup the load we are storing in our map of split | |||||
3630 | // offsets. | |||||
3631 | auto *LI = cast<LoadInst>(SI->getValueOperand()); | |||||
3632 | // If it was completely unsplittable, then we're done, | |||||
3633 | // and this store can't be pre-split. | |||||
3634 | if (UnsplittableLoads.count(LI)) | |||||
3635 | return true; | |||||
3636 | ||||||
3637 | auto LoadOffsetsI = SplitOffsetsMap.find(LI); | |||||
3638 | if (LoadOffsetsI == SplitOffsetsMap.end()) | |||||
3639 | return false; // Unrelated loads are definitely safe. | |||||
3640 | auto &LoadOffsets = LoadOffsetsI->second; | |||||
3641 | ||||||
3642 | // Now lookup the store's offsets. | |||||
3643 | auto &StoreOffsets = SplitOffsetsMap[SI]; | |||||
3644 | ||||||
3645 | // If the relative offsets of each split in the load and | |||||
3646 | // store match exactly, then we can split them and we | |||||
3647 | // don't need to remove them here. | |||||
3648 | if (LoadOffsets.Splits == StoreOffsets.Splits) | |||||
3649 | return false; | |||||
3650 | ||||||
3651 | DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Mismatched splits for load and store:\n" << " " << *LI << "\n" << " " << *SI << "\n"; } } while (0) | |||||
3652 | << " Mismatched splits for load and store:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Mismatched splits for load and store:\n" << " " << *LI << "\n" << " " << *SI << "\n"; } } while (0) | |||||
3653 | << " " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Mismatched splits for load and store:\n" << " " << *LI << "\n" << " " << *SI << "\n"; } } while (0) | |||||
3654 | << " " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Mismatched splits for load and store:\n" << " " << *LI << "\n" << " " << *SI << "\n"; } } while (0); | |||||
3655 | ||||||
3656 | // We've found a store and load that we need to split | |||||
3657 | // with mismatched relative splits. Just give up on them | |||||
3658 | // and remove both instructions from our list of | |||||
3659 | // candidates. | |||||
3660 | UnsplittableLoads.insert(LI); | |||||
3661 | return true; | |||||
3662 | }), | |||||
3663 | Stores.end()); | |||||
3664 | // Now we have to go *back* through all te stores, because a later store may | |||||
3665 | // have caused an earlier store's load to become unsplittable and if it is | |||||
3666 | // unsplittable for the later store, then we can't rely on it being split in | |||||
3667 | // the earlier store either. | |||||
3668 | Stores.erase(std::remove_if(Stores.begin(), Stores.end(), | |||||
3669 | [&UnsplittableLoads](StoreInst *SI) { | |||||
3670 | auto *LI = | |||||
3671 | cast<LoadInst>(SI->getValueOperand()); | |||||
3672 | return UnsplittableLoads.count(LI); | |||||
3673 | }), | |||||
3674 | Stores.end()); | |||||
3675 | // Once we've established all the loads that can't be split for some reason, | |||||
3676 | // filter any that made it into our list out. | |||||
3677 | Loads.erase(std::remove_if(Loads.begin(), Loads.end(), | |||||
3678 | [&UnsplittableLoads](LoadInst *LI) { | |||||
3679 | return UnsplittableLoads.count(LI); | |||||
3680 | }), | |||||
3681 | Loads.end()); | |||||
3682 | ||||||
3683 | ||||||
3684 | // If no loads or stores are left, there is no pre-splitting to be done for | |||||
3685 | // this alloca. | |||||
3686 | if (Loads.empty() && Stores.empty()) | |||||
3687 | return false; | |||||
3688 | ||||||
3689 | // From here on, we can't fail and will be building new accesses, so rig up | |||||
3690 | // an IR builder. | |||||
3691 | IRBuilderTy IRB(&AI); | |||||
3692 | ||||||
3693 | // Collect the new slices which we will merge into the alloca slices. | |||||
3694 | SmallVector<Slice, 4> NewSlices; | |||||
3695 | ||||||
3696 | // Track any allocas we end up splitting loads and stores for so we iterate | |||||
3697 | // on them. | |||||
3698 | SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas; | |||||
3699 | ||||||
3700 | // At this point, we have collected all of the loads and stores we can | |||||
3701 | // pre-split, and the specific splits needed for them. We actually do the | |||||
3702 | // splitting in a specific order in order to handle when one of the loads in | |||||
3703 | // the value operand to one of the stores. | |||||
3704 | // | |||||
3705 | // First, we rewrite all of the split loads, and just accumulate each split | |||||
3706 | // load in a parallel structure. We also build the slices for them and append | |||||
3707 | // them to the alloca slices. | |||||
3708 | SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap; | |||||
3709 | std::vector<LoadInst *> SplitLoads; | |||||
3710 | const DataLayout &DL = AI.getModule()->getDataLayout(); | |||||
3711 | for (LoadInst *LI : Loads) { | |||||
3712 | SplitLoads.clear(); | |||||
3713 | ||||||
3714 | IntegerType *Ty = cast<IntegerType>(LI->getType()); | |||||
3715 | uint64_t LoadSize = Ty->getBitWidth() / 8; | |||||
3716 | assert(LoadSize > 0 && "Cannot have a zero-sized integer load!")((LoadSize > 0 && "Cannot have a zero-sized integer load!" ) ? static_cast<void> (0) : __assert_fail ("LoadSize > 0 && \"Cannot have a zero-sized integer load!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3716, __PRETTY_FUNCTION__)); | |||||
3717 | ||||||
3718 | auto &Offsets = SplitOffsetsMap[LI]; | |||||
3719 | assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&((LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset () && "Slice size should always match load size exactly!" ) ? static_cast<void> (0) : __assert_fail ("LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && \"Slice size should always match load size exactly!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3720, __PRETTY_FUNCTION__)) | |||||
3720 | "Slice size should always match load size exactly!")((LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset () && "Slice size should always match load size exactly!" ) ? static_cast<void> (0) : __assert_fail ("LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && \"Slice size should always match load size exactly!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3720, __PRETTY_FUNCTION__)); | |||||
3721 | uint64_t BaseOffset = Offsets.S->beginOffset(); | |||||
3722 | assert(BaseOffset + LoadSize > BaseOffset &&((BaseOffset + LoadSize > BaseOffset && "Cannot represent alloca access size using 64-bit integers!" ) ? static_cast<void> (0) : __assert_fail ("BaseOffset + LoadSize > BaseOffset && \"Cannot represent alloca access size using 64-bit integers!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3723, __PRETTY_FUNCTION__)) | |||||
3723 | "Cannot represent alloca access size using 64-bit integers!")((BaseOffset + LoadSize > BaseOffset && "Cannot represent alloca access size using 64-bit integers!" ) ? static_cast<void> (0) : __assert_fail ("BaseOffset + LoadSize > BaseOffset && \"Cannot represent alloca access size using 64-bit integers!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3723, __PRETTY_FUNCTION__)); | |||||
3724 | ||||||
3725 | Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand()); | |||||
3726 | IRB.SetInsertPoint(BasicBlock::iterator(LI)); | |||||
3727 | ||||||
3728 | DEBUG(dbgs() << " Splitting load: " << *LI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Splitting load: " << *LI << "\n"; } } while (0); | |||||
3729 | ||||||
3730 | uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); | |||||
3731 | int Idx = 0, Size = Offsets.Splits.size(); | |||||
3732 | for (;;) { | |||||
3733 | auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); | |||||
3734 | auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); | |||||
3735 | LoadInst *PLoad = IRB.CreateAlignedLoad( | |||||
3736 | getAdjustedPtr(IRB, DL, BasePtr, | |||||
3737 | APInt(DL.getPointerSizeInBits(), PartOffset), | |||||
3738 | PartPtrTy, BasePtr->getName() + "."), | |||||
3739 | getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, | |||||
3740 | LI->getName()); | |||||
3741 | ||||||
3742 | // Append this load onto the list of split loads so we can find it later | |||||
3743 | // to rewrite the stores. | |||||
3744 | SplitLoads.push_back(PLoad); | |||||
3745 | ||||||
3746 | // Now build a new slice for the alloca. | |||||
3747 | NewSlices.push_back( | |||||
3748 | Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, | |||||
3749 | &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), | |||||
3750 | /*IsSplittable*/ false)); | |||||
3751 | DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " new slice [" << NewSlices .back().beginOffset() << ", " << NewSlices.back() .endOffset() << "): " << *PLoad << "\n"; } } while (0) | |||||
3752 | << ", " << NewSlices.back().endOffset() << "): " << *PLoaddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " new slice [" << NewSlices .back().beginOffset() << ", " << NewSlices.back() .endOffset() << "): " << *PLoad << "\n"; } } while (0) | |||||
3753 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " new slice [" << NewSlices .back().beginOffset() << ", " << NewSlices.back() .endOffset() << "): " << *PLoad << "\n"; } } while (0); | |||||
3754 | ||||||
3755 | // See if we've handled all the splits. | |||||
3756 | if (Idx >= Size) | |||||
3757 | break; | |||||
3758 | ||||||
3759 | // Setup the next partition. | |||||
3760 | PartOffset = Offsets.Splits[Idx]; | |||||
3761 | ++Idx; | |||||
3762 | PartSize = (Idx < Size ? Offsets.Splits[Idx] : LoadSize) - PartOffset; | |||||
3763 | } | |||||
3764 | ||||||
3765 | // Now that we have the split loads, do the slow walk over all uses of the | |||||
3766 | // load and rewrite them as split stores, or save the split loads to use | |||||
3767 | // below if the store is going to be split there anyways. | |||||
3768 | bool DeferredStores = false; | |||||
3769 | for (User *LU : LI->users()) { | |||||
3770 | StoreInst *SI = cast<StoreInst>(LU); | |||||
3771 | if (!Stores.empty() && SplitOffsetsMap.count(SI)) { | |||||
3772 | DeferredStores = true; | |||||
3773 | DEBUG(dbgs() << " Deferred splitting of store: " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Deferred splitting of store: " << *SI << "\n"; } } while (0); | |||||
3774 | continue; | |||||
3775 | } | |||||
3776 | ||||||
3777 | Value *StoreBasePtr = SI->getPointerOperand(); | |||||
3778 | IRB.SetInsertPoint(BasicBlock::iterator(SI)); | |||||
3779 | ||||||
3780 | DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Splitting store of load: " << *SI << "\n"; } } while (0); | |||||
3781 | ||||||
3782 | for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) { | |||||
3783 | LoadInst *PLoad = SplitLoads[Idx]; | |||||
3784 | uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1]; | |||||
3785 | auto *PartPtrTy = | |||||
3786 | PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); | |||||
3787 | ||||||
3788 | StoreInst *PStore = IRB.CreateAlignedStore( | |||||
3789 | PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, | |||||
3790 | APInt(DL.getPointerSizeInBits(), PartOffset), | |||||
3791 | PartPtrTy, StoreBasePtr->getName() + "."), | |||||
3792 | getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); | |||||
3793 | (void)PStore; | |||||
3794 | DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " +" << PartOffset << ":" << *PStore << "\n"; } } while (0); | |||||
3795 | } | |||||
3796 | ||||||
3797 | // We want to immediately iterate on any allocas impacted by splitting | |||||
3798 | // this store, and we have to track any promotable alloca (indicated by | |||||
3799 | // a direct store) as needing to be resplit because it is no longer | |||||
3800 | // promotable. | |||||
3801 | if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) { | |||||
3802 | ResplitPromotableAllocas.insert(OtherAI); | |||||
3803 | Worklist.insert(OtherAI); | |||||
3804 | } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>( | |||||
3805 | StoreBasePtr->stripInBoundsOffsets())) { | |||||
3806 | Worklist.insert(OtherAI); | |||||
3807 | } | |||||
3808 | ||||||
3809 | // Mark the original store as dead. | |||||
3810 | DeadInsts.insert(SI); | |||||
3811 | } | |||||
3812 | ||||||
3813 | // Save the split loads if there are deferred stores among the users. | |||||
3814 | if (DeferredStores) | |||||
3815 | SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads))); | |||||
3816 | ||||||
3817 | // Mark the original load as dead and kill the original slice. | |||||
3818 | DeadInsts.insert(LI); | |||||
3819 | Offsets.S->kill(); | |||||
3820 | } | |||||
3821 | ||||||
3822 | // Second, we rewrite all of the split stores. At this point, we know that | |||||
3823 | // all loads from this alloca have been split already. For stores of such | |||||
3824 | // loads, we can simply look up the pre-existing split loads. For stores of | |||||
3825 | // other loads, we split those loads first and then write split stores of | |||||
3826 | // them. | |||||
3827 | for (StoreInst *SI : Stores) { | |||||
3828 | auto *LI = cast<LoadInst>(SI->getValueOperand()); | |||||
3829 | IntegerType *Ty = cast<IntegerType>(LI->getType()); | |||||
3830 | uint64_t StoreSize = Ty->getBitWidth() / 8; | |||||
3831 | assert(StoreSize > 0 && "Cannot have a zero-sized integer store!")((StoreSize > 0 && "Cannot have a zero-sized integer store!" ) ? static_cast<void> (0) : __assert_fail ("StoreSize > 0 && \"Cannot have a zero-sized integer store!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3831, __PRETTY_FUNCTION__)); | |||||
3832 | ||||||
3833 | auto &Offsets = SplitOffsetsMap[SI]; | |||||
3834 | assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&((StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset () && "Slice size should always match load size exactly!" ) ? static_cast<void> (0) : __assert_fail ("StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && \"Slice size should always match load size exactly!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3835, __PRETTY_FUNCTION__)) | |||||
3835 | "Slice size should always match load size exactly!")((StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset () && "Slice size should always match load size exactly!" ) ? static_cast<void> (0) : __assert_fail ("StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && \"Slice size should always match load size exactly!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3835, __PRETTY_FUNCTION__)); | |||||
3836 | uint64_t BaseOffset = Offsets.S->beginOffset(); | |||||
3837 | assert(BaseOffset + StoreSize > BaseOffset &&((BaseOffset + StoreSize > BaseOffset && "Cannot represent alloca access size using 64-bit integers!" ) ? static_cast<void> (0) : __assert_fail ("BaseOffset + StoreSize > BaseOffset && \"Cannot represent alloca access size using 64-bit integers!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3838, __PRETTY_FUNCTION__)) | |||||
3838 | "Cannot represent alloca access size using 64-bit integers!")((BaseOffset + StoreSize > BaseOffset && "Cannot represent alloca access size using 64-bit integers!" ) ? static_cast<void> (0) : __assert_fail ("BaseOffset + StoreSize > BaseOffset && \"Cannot represent alloca access size using 64-bit integers!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3838, __PRETTY_FUNCTION__)); | |||||
3839 | ||||||
3840 | Value *LoadBasePtr = LI->getPointerOperand(); | |||||
3841 | Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand()); | |||||
3842 | ||||||
3843 | DEBUG(dbgs() << " Splitting store: " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Splitting store: " << *SI << "\n"; } } while (0); | |||||
3844 | ||||||
3845 | // Check whether we have an already split load. | |||||
3846 | auto SplitLoadsMapI = SplitLoadsMap.find(LI); | |||||
3847 | std::vector<LoadInst *> *SplitLoads = nullptr; | |||||
3848 | if (SplitLoadsMapI != SplitLoadsMap.end()) { | |||||
3849 | SplitLoads = &SplitLoadsMapI->second; | |||||
3850 | assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&((SplitLoads->size() == Offsets.Splits.size() + 1 && "Too few split loads for the number of splits in the store!" ) ? static_cast<void> (0) : __assert_fail ("SplitLoads->size() == Offsets.Splits.size() + 1 && \"Too few split loads for the number of splits in the store!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3851, __PRETTY_FUNCTION__)) | |||||
3851 | "Too few split loads for the number of splits in the store!")((SplitLoads->size() == Offsets.Splits.size() + 1 && "Too few split loads for the number of splits in the store!" ) ? static_cast<void> (0) : __assert_fail ("SplitLoads->size() == Offsets.Splits.size() + 1 && \"Too few split loads for the number of splits in the store!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3851, __PRETTY_FUNCTION__)); | |||||
3852 | } else { | |||||
3853 | DEBUG(dbgs() << " of load: " << *LI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " of load: " << *LI << "\n"; } } while (0); | |||||
3854 | } | |||||
3855 | ||||||
3856 | uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); | |||||
3857 | int Idx = 0, Size = Offsets.Splits.size(); | |||||
3858 | for (;;) { | |||||
3859 | auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); | |||||
3860 | auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); | |||||
3861 | ||||||
3862 | // Either lookup a split load or create one. | |||||
3863 | LoadInst *PLoad; | |||||
3864 | if (SplitLoads) { | |||||
3865 | PLoad = (*SplitLoads)[Idx]; | |||||
3866 | } else { | |||||
3867 | IRB.SetInsertPoint(BasicBlock::iterator(LI)); | |||||
3868 | PLoad = IRB.CreateAlignedLoad( | |||||
3869 | getAdjustedPtr(IRB, DL, LoadBasePtr, | |||||
3870 | APInt(DL.getPointerSizeInBits(), PartOffset), | |||||
3871 | PartPtrTy, LoadBasePtr->getName() + "."), | |||||
3872 | getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, | |||||
3873 | LI->getName()); | |||||
3874 | } | |||||
3875 | ||||||
3876 | // And store this partition. | |||||
3877 | IRB.SetInsertPoint(BasicBlock::iterator(SI)); | |||||
3878 | StoreInst *PStore = IRB.CreateAlignedStore( | |||||
3879 | PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, | |||||
3880 | APInt(DL.getPointerSizeInBits(), PartOffset), | |||||
3881 | PartPtrTy, StoreBasePtr->getName() + "."), | |||||
3882 | getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); | |||||
3883 | ||||||
3884 | // Now build a new slice for the alloca. | |||||
3885 | NewSlices.push_back( | |||||
3886 | Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, | |||||
3887 | &PStore->getOperandUse(PStore->getPointerOperandIndex()), | |||||
3888 | /*IsSplittable*/ false)); | |||||
3889 | DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " new slice [" << NewSlices .back().beginOffset() << ", " << NewSlices.back() .endOffset() << "): " << *PStore << "\n"; } } while (0) | |||||
3890 | << ", " << NewSlices.back().endOffset() << "): " << *PStoredo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " new slice [" << NewSlices .back().beginOffset() << ", " << NewSlices.back() .endOffset() << "): " << *PStore << "\n"; } } while (0) | |||||
3891 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " new slice [" << NewSlices .back().beginOffset() << ", " << NewSlices.back() .endOffset() << "): " << *PStore << "\n"; } } while (0); | |||||
3892 | if (!SplitLoads) { | |||||
3893 | DEBUG(dbgs() << " of split load: " << *PLoad << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " of split load: " << * PLoad << "\n"; } } while (0); | |||||
3894 | } | |||||
3895 | ||||||
3896 | // See if we've finished all the splits. | |||||
3897 | if (Idx >= Size) | |||||
3898 | break; | |||||
3899 | ||||||
3900 | // Setup the next partition. | |||||
3901 | PartOffset = Offsets.Splits[Idx]; | |||||
3902 | ++Idx; | |||||
3903 | PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset; | |||||
3904 | } | |||||
3905 | ||||||
3906 | // We want to immediately iterate on any allocas impacted by splitting | |||||
3907 | // this load, which is only relevant if it isn't a load of this alloca and | |||||
3908 | // thus we didn't already split the loads above. We also have to keep track | |||||
3909 | // of any promotable allocas we split loads on as they can no longer be | |||||
3910 | // promoted. | |||||
3911 | if (!SplitLoads) { | |||||
3912 | if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) { | |||||
3913 | assert(OtherAI != &AI && "We can't re-split our own alloca!")((OtherAI != &AI && "We can't re-split our own alloca!" ) ? static_cast<void> (0) : __assert_fail ("OtherAI != &AI && \"We can't re-split our own alloca!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3913, __PRETTY_FUNCTION__)); | |||||
3914 | ResplitPromotableAllocas.insert(OtherAI); | |||||
3915 | Worklist.insert(OtherAI); | |||||
3916 | } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>( | |||||
3917 | LoadBasePtr->stripInBoundsOffsets())) { | |||||
3918 | assert(OtherAI != &AI && "We can't re-split our own alloca!")((OtherAI != &AI && "We can't re-split our own alloca!" ) ? static_cast<void> (0) : __assert_fail ("OtherAI != &AI && \"We can't re-split our own alloca!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3918, __PRETTY_FUNCTION__)); | |||||
3919 | Worklist.insert(OtherAI); | |||||
3920 | } | |||||
3921 | } | |||||
3922 | ||||||
3923 | // Mark the original store as dead now that we've split it up and kill its | |||||
3924 | // slice. Note that we leave the original load in place unless this store | |||||
3925 | // was its ownly use. It may in turn be split up if it is an alloca load | |||||
3926 | // for some other alloca, but it may be a normal load. This may introduce | |||||
3927 | // redundant loads, but where those can be merged the rest of the optimizer | |||||
3928 | // should handle the merging, and this uncovers SSA splits which is more | |||||
3929 | // important. In practice, the original loads will almost always be fully | |||||
3930 | // split and removed eventually, and the splits will be merged by any | |||||
3931 | // trivial CSE, including instcombine. | |||||
3932 | if (LI->hasOneUse()) { | |||||
3933 | assert(*LI->user_begin() == SI && "Single use isn't this store!")((*LI->user_begin() == SI && "Single use isn't this store!" ) ? static_cast<void> (0) : __assert_fail ("*LI->user_begin() == SI && \"Single use isn't this store!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3933, __PRETTY_FUNCTION__)); | |||||
3934 | DeadInsts.insert(LI); | |||||
3935 | } | |||||
3936 | DeadInsts.insert(SI); | |||||
3937 | Offsets.S->kill(); | |||||
3938 | } | |||||
3939 | ||||||
3940 | // Remove the killed slices that have ben pre-split. | |||||
3941 | AS.erase(std::remove_if(AS.begin(), AS.end(), [](const Slice &S) { | |||||
3942 | return S.isDead(); | |||||
3943 | }), AS.end()); | |||||
3944 | ||||||
3945 | // Insert our new slices. This will sort and merge them into the sorted | |||||
3946 | // sequence. | |||||
3947 | AS.insert(NewSlices); | |||||
3948 | ||||||
3949 | DEBUG(dbgs() << " Pre-split slices:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Pre-split slices:\n"; } } while (0); | |||||
3950 | #ifndef NDEBUG | |||||
3951 | for (auto I = AS.begin(), E = AS.end(); I != E; ++I) | |||||
3952 | DEBUG(AS.print(dbgs(), I, " "))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { AS.print(dbgs(), I, " "); } } while (0); | |||||
3953 | #endif | |||||
3954 | ||||||
3955 | // Finally, don't try to promote any allocas that new require re-splitting. | |||||
3956 | // They have already been added to the worklist above. | |||||
3957 | PromotableAllocas.erase( | |||||
3958 | std::remove_if( | |||||
3959 | PromotableAllocas.begin(), PromotableAllocas.end(), | |||||
3960 | [&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }), | |||||
3961 | PromotableAllocas.end()); | |||||
3962 | ||||||
3963 | return true; | |||||
3964 | } | |||||
3965 | ||||||
3966 | /// \brief Rewrite an alloca partition's users. | |||||
3967 | /// | |||||
3968 | /// This routine drives both of the rewriting goals of the SROA pass. It tries | |||||
3969 | /// to rewrite uses of an alloca partition to be conducive for SSA value | |||||
3970 | /// promotion. If the partition needs a new, more refined alloca, this will | |||||
3971 | /// build that new alloca, preserving as much type information as possible, and | |||||
3972 | /// rewrite the uses of the old alloca to point at the new one and have the | |||||
3973 | /// appropriate new offsets. It also evaluates how successful the rewrite was | |||||
3974 | /// at enabling promotion and if it was successful queues the alloca to be | |||||
3975 | /// promoted. | |||||
3976 | AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, | |||||
3977 | AllocaSlices::Partition &P) { | |||||
3978 | // Try to compute a friendly type for this partition of the alloca. This | |||||
3979 | // won't always succeed, in which case we fall back to a legal integer type | |||||
3980 | // or an i8 array of an appropriate size. | |||||
3981 | Type *SliceTy = nullptr; | |||||
3982 | const DataLayout &DL = AI.getModule()->getDataLayout(); | |||||
3983 | if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) | |||||
3984 | if (DL.getTypeAllocSize(CommonUseTy) >= P.size()) | |||||
3985 | SliceTy = CommonUseTy; | |||||
3986 | if (!SliceTy) | |||||
3987 | if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), | |||||
3988 | P.beginOffset(), P.size())) | |||||
3989 | SliceTy = TypePartitionTy; | |||||
3990 | if ((!SliceTy || (SliceTy->isArrayTy() && | |||||
3991 | SliceTy->getArrayElementType()->isIntegerTy())) && | |||||
3992 | DL.isLegalInteger(P.size() * 8)) | |||||
3993 | SliceTy = Type::getIntNTy(*C, P.size() * 8); | |||||
3994 | if (!SliceTy) | |||||
3995 | SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); | |||||
3996 | assert(DL.getTypeAllocSize(SliceTy) >= P.size())((DL.getTypeAllocSize(SliceTy) >= P.size()) ? static_cast< void> (0) : __assert_fail ("DL.getTypeAllocSize(SliceTy) >= P.size()" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 3996, __PRETTY_FUNCTION__)); | |||||
3997 | ||||||
3998 | bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); | |||||
3999 | ||||||
4000 | VectorType *VecTy = | |||||
4001 | IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); | |||||
4002 | if (VecTy) | |||||
4003 | SliceTy = VecTy; | |||||
4004 | ||||||
4005 | // Check for the case where we're going to rewrite to a new alloca of the | |||||
4006 | // exact same type as the original, and with the same access offsets. In that | |||||
4007 | // case, re-use the existing alloca, but still run through the rewriter to | |||||
4008 | // perform phi and select speculation. | |||||
4009 | AllocaInst *NewAI; | |||||
4010 | if (SliceTy == AI.getAllocatedType()) { | |||||
4011 | assert(P.beginOffset() == 0 &&((P.beginOffset() == 0 && "Non-zero begin offset but same alloca type" ) ? static_cast<void> (0) : __assert_fail ("P.beginOffset() == 0 && \"Non-zero begin offset but same alloca type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 4012, __PRETTY_FUNCTION__)) | |||||
4012 | "Non-zero begin offset but same alloca type")((P.beginOffset() == 0 && "Non-zero begin offset but same alloca type" ) ? static_cast<void> (0) : __assert_fail ("P.beginOffset() == 0 && \"Non-zero begin offset but same alloca type\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 4012, __PRETTY_FUNCTION__)); | |||||
4013 | NewAI = &AI; | |||||
4014 | // FIXME: We should be able to bail at this point with "nothing changed". | |||||
4015 | // FIXME: We might want to defer PHI speculation until after here. | |||||
4016 | // FIXME: return nullptr; | |||||
4017 | } else { | |||||
4018 | unsigned Alignment = AI.getAlignment(); | |||||
4019 | if (!Alignment) { | |||||
4020 | // The minimum alignment which users can rely on when the explicit | |||||
4021 | // alignment is omitted or zero is that required by the ABI for this | |||||
4022 | // type. | |||||
4023 | Alignment = DL.getABITypeAlignment(AI.getAllocatedType()); | |||||
4024 | } | |||||
4025 | Alignment = MinAlign(Alignment, P.beginOffset()); | |||||
4026 | // If we will get at least this much alignment from the type alone, leave | |||||
4027 | // the alloca's alignment unconstrained. | |||||
4028 | if (Alignment <= DL.getABITypeAlignment(SliceTy)) | |||||
4029 | Alignment = 0; | |||||
4030 | NewAI = new AllocaInst( | |||||
4031 | SliceTy, nullptr, Alignment, | |||||
4032 | AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI); | |||||
4033 | ++NumNewAllocas; | |||||
4034 | } | |||||
4035 | ||||||
4036 | DEBUG(dbgs() << "Rewriting alloca partition "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset() << "," << P.endOffset () << ") to: " << *NewAI << "\n"; } } while (0) | |||||
4037 | << "[" << P.beginOffset() << "," << P.endOffset()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset() << "," << P.endOffset () << ") to: " << *NewAI << "\n"; } } while (0) | |||||
4038 | << ") to: " << *NewAI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset() << "," << P.endOffset () << ") to: " << *NewAI << "\n"; } } while (0); | |||||
4039 | ||||||
4040 | // Track the high watermark on the worklist as it is only relevant for | |||||
4041 | // promoted allocas. We will reset it to this point if the alloca is not in | |||||
4042 | // fact scheduled for promotion. | |||||
4043 | unsigned PPWOldSize = PostPromotionWorklist.size(); | |||||
4044 | unsigned NumUses = 0; | |||||
4045 | SmallPtrSet<PHINode *, 8> PHIUsers; | |||||
4046 | SmallPtrSet<SelectInst *, 8> SelectUsers; | |||||
4047 | ||||||
4048 | AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), | |||||
4049 | P.endOffset(), IsIntegerPromotable, VecTy, | |||||
4050 | PHIUsers, SelectUsers); | |||||
4051 | bool Promotable = true; | |||||
4052 | for (Slice *S : P.splitSliceTails()) { | |||||
4053 | Promotable &= Rewriter.visit(S); | |||||
4054 | ++NumUses; | |||||
4055 | } | |||||
4056 | for (Slice &S : P) { | |||||
4057 | Promotable &= Rewriter.visit(&S); | |||||
4058 | ++NumUses; | |||||
4059 | } | |||||
4060 | ||||||
4061 | NumAllocaPartitionUses += NumUses; | |||||
4062 | MaxUsesPerAllocaPartition = | |||||
4063 | std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition); | |||||
4064 | ||||||
4065 | // Now that we've processed all the slices in the new partition, check if any | |||||
4066 | // PHIs or Selects would block promotion. | |||||
4067 | for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(), | |||||
4068 | E = PHIUsers.end(); | |||||
4069 | I != E; ++I) | |||||
4070 | if (!isSafePHIToSpeculate(**I)) { | |||||
4071 | Promotable = false; | |||||
4072 | PHIUsers.clear(); | |||||
4073 | SelectUsers.clear(); | |||||
4074 | break; | |||||
4075 | } | |||||
4076 | for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(), | |||||
4077 | E = SelectUsers.end(); | |||||
4078 | I != E; ++I) | |||||
4079 | if (!isSafeSelectToSpeculate(**I)) { | |||||
4080 | Promotable = false; | |||||
4081 | PHIUsers.clear(); | |||||
4082 | SelectUsers.clear(); | |||||
4083 | break; | |||||
4084 | } | |||||
4085 | ||||||
4086 | if (Promotable) { | |||||
4087 | if (PHIUsers.empty() && SelectUsers.empty()) { | |||||
4088 | // Promote the alloca. | |||||
4089 | PromotableAllocas.push_back(NewAI); | |||||
4090 | } else { | |||||
4091 | // If we have either PHIs or Selects to speculate, add them to those | |||||
4092 | // worklists and re-queue the new alloca so that we promote in on the | |||||
4093 | // next iteration. | |||||
4094 | for (PHINode *PHIUser : PHIUsers) | |||||
4095 | SpeculatablePHIs.insert(PHIUser); | |||||
4096 | for (SelectInst *SelectUser : SelectUsers) | |||||
4097 | SpeculatableSelects.insert(SelectUser); | |||||
4098 | Worklist.insert(NewAI); | |||||
4099 | } | |||||
4100 | } else { | |||||
4101 | // If we can't promote the alloca, iterate on it to check for new | |||||
4102 | // refinements exposed by splitting the current alloca. Don't iterate on an | |||||
4103 | // alloca which didn't actually change and didn't get promoted. | |||||
4104 | if (NewAI != &AI) | |||||
4105 | Worklist.insert(NewAI); | |||||
4106 | ||||||
4107 | // Drop any post-promotion work items if promotion didn't happen. | |||||
4108 | while (PostPromotionWorklist.size() > PPWOldSize) | |||||
4109 | PostPromotionWorklist.pop_back(); | |||||
4110 | } | |||||
4111 | ||||||
4112 | return NewAI; | |||||
4113 | } | |||||
4114 | ||||||
4115 | /// \brief Walks the slices of an alloca and form partitions based on them, | |||||
4116 | /// rewriting each of their uses. | |||||
4117 | bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { | |||||
4118 | if (AS.begin() == AS.end()) | |||||
4119 | return false; | |||||
4120 | ||||||
4121 | unsigned NumPartitions = 0; | |||||
4122 | bool Changed = false; | |||||
4123 | const DataLayout &DL = AI.getModule()->getDataLayout(); | |||||
4124 | ||||||
4125 | // First try to pre-split loads and stores. | |||||
4126 | Changed |= presplitLoadsAndStores(AI, AS); | |||||
4127 | ||||||
4128 | // Now that we have identified any pre-splitting opportunities, mark any | |||||
4129 | // splittable (non-whole-alloca) loads and stores as unsplittable. If we fail | |||||
4130 | // to split these during pre-splitting, we want to force them to be | |||||
4131 | // rewritten into a partition. | |||||
4132 | bool IsSorted = true; | |||||
4133 | for (Slice &S : AS) { | |||||
4134 | if (!S.isSplittable()) | |||||
4135 | continue; | |||||
4136 | // FIXME: We currently leave whole-alloca splittable loads and stores. This | |||||
4137 | // used to be the only splittable loads and stores and we need to be | |||||
4138 | // confident that the above handling of splittable loads and stores is | |||||
4139 | // completely sufficient before we forcibly disable the remaining handling. | |||||
4140 | if (S.beginOffset() == 0 && | |||||
4141 | S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType())) | |||||
4142 | continue; | |||||
4143 | if (isa<LoadInst>(S.getUse()->getUser()) || | |||||
4144 | isa<StoreInst>(S.getUse()->getUser())) { | |||||
4145 | S.makeUnsplittable(); | |||||
4146 | IsSorted = false; | |||||
4147 | } | |||||
4148 | } | |||||
4149 | if (!IsSorted) | |||||
4150 | std::sort(AS.begin(), AS.end()); | |||||
4151 | ||||||
4152 | /// \brief Describes the allocas introduced by rewritePartition | |||||
4153 | /// in order to migrate the debug info. | |||||
4154 | struct Piece { | |||||
4155 | AllocaInst *Alloca; | |||||
4156 | uint64_t Offset; | |||||
4157 | uint64_t Size; | |||||
4158 | Piece(AllocaInst *AI, uint64_t O, uint64_t S) | |||||
4159 | : Alloca(AI), Offset(O), Size(S) {} | |||||
4160 | }; | |||||
4161 | SmallVector<Piece, 4> Pieces; | |||||
4162 | ||||||
4163 | // Rewrite each partition. | |||||
4164 | for (auto &P : AS.partitions()) { | |||||
4165 | if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) { | |||||
4166 | Changed = true; | |||||
4167 | if (NewAI != &AI) { | |||||
4168 | uint64_t SizeOfByte = 8; | |||||
4169 | uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType()); | |||||
4170 | // Don't include any padding. | |||||
4171 | uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte); | |||||
4172 | Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size)); | |||||
4173 | } | |||||
4174 | } | |||||
4175 | ++NumPartitions; | |||||
4176 | } | |||||
4177 | ||||||
4178 | NumAllocaPartitions += NumPartitions; | |||||
4179 | MaxPartitionsPerAlloca = | |||||
4180 | std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca); | |||||
4181 | ||||||
4182 | // Migrate debug information from the old alloca to the new alloca(s) | |||||
4183 | // and the individial partitions. | |||||
4184 | if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) { | |||||
4185 | auto *Var = DbgDecl->getVariable(); | |||||
4186 | auto *Expr = DbgDecl->getExpression(); | |||||
4187 | DIBuilder DIB(*AI.getParent()->getParent()->getParent(), | |||||
4188 | /*AllowUnresolved*/ false); | |||||
4189 | bool IsSplit = Pieces.size() > 1; | |||||
4190 | for (auto Piece : Pieces) { | |||||
4191 | // Create a piece expression describing the new partition or reuse AI's | |||||
4192 | // expression if there is only one partition. | |||||
4193 | auto *PieceExpr = Expr; | |||||
4194 | if (IsSplit || Expr->isBitPiece()) { | |||||
4195 | // If this alloca is already a scalar replacement of a larger aggregate, | |||||
4196 | // Piece.Offset describes the offset inside the scalar. | |||||
4197 | uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0; | |||||
4198 | uint64_t Start = Offset + Piece.Offset; | |||||
4199 | uint64_t Size = Piece.Size; | |||||
4200 | if (Expr->isBitPiece()) { | |||||
4201 | uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize(); | |||||
4202 | if (Start >= AbsEnd) | |||||
4203 | // No need to describe a SROAed padding. | |||||
4204 | continue; | |||||
4205 | Size = std::min(Size, AbsEnd - Start); | |||||
4206 | } | |||||
4207 | PieceExpr = DIB.createBitPieceExpression(Start, Size); | |||||
4208 | } | |||||
4209 | ||||||
4210 | // Remove any existing dbg.declare intrinsic describing the same alloca. | |||||
4211 | if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca)) | |||||
4212 | OldDDI->eraseFromParent(); | |||||
4213 | ||||||
4214 | DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(), | |||||
4215 | &AI); | |||||
4216 | } | |||||
4217 | } | |||||
4218 | return Changed; | |||||
4219 | } | |||||
4220 | ||||||
4221 | /// \brief Clobber a use with undef, deleting the used value if it becomes dead. | |||||
4222 | void SROA::clobberUse(Use &U) { | |||||
4223 | Value *OldV = U; | |||||
4224 | // Replace the use with an undef value. | |||||
4225 | U = UndefValue::get(OldV->getType()); | |||||
4226 | ||||||
4227 | // Check for this making an instruction dead. We have to garbage collect | |||||
4228 | // all the dead instructions to ensure the uses of any alloca end up being | |||||
4229 | // minimal. | |||||
4230 | if (Instruction *OldI = dyn_cast<Instruction>(OldV)) | |||||
4231 | if (isInstructionTriviallyDead(OldI)) { | |||||
4232 | DeadInsts.insert(OldI); | |||||
4233 | } | |||||
4234 | } | |||||
4235 | ||||||
4236 | /// \brief Analyze an alloca for SROA. | |||||
4237 | /// | |||||
4238 | /// This analyzes the alloca to ensure we can reason about it, builds | |||||
4239 | /// the slices of the alloca, and then hands it off to be split and | |||||
4240 | /// rewritten as needed. | |||||
4241 | bool SROA::runOnAlloca(AllocaInst &AI) { | |||||
4242 | DEBUG(dbgs() << "SROA alloca: " << AI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "SROA alloca: " << AI << "\n"; } } while (0); | |||||
4243 | ++NumAllocasAnalyzed; | |||||
4244 | ||||||
4245 | // Special case dead allocas, as they're trivial. | |||||
4246 | if (AI.use_empty()) { | |||||
4247 | AI.eraseFromParent(); | |||||
4248 | return true; | |||||
4249 | } | |||||
4250 | const DataLayout &DL = AI.getModule()->getDataLayout(); | |||||
4251 | ||||||
4252 | // Skip alloca forms that this analysis can't handle. | |||||
4253 | if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() || | |||||
4254 | DL.getTypeAllocSize(AI.getAllocatedType()) == 0) | |||||
4255 | return false; | |||||
4256 | ||||||
4257 | bool Changed = false; | |||||
4258 | ||||||
4259 | // First, split any FCA loads and stores touching this alloca to promote | |||||
4260 | // better splitting and promotion opportunities. | |||||
4261 | AggLoadStoreRewriter AggRewriter(DL); | |||||
4262 | Changed |= AggRewriter.rewrite(AI); | |||||
4263 | ||||||
4264 | // Build the slices using a recursive instruction-visiting builder. | |||||
4265 | AllocaSlices AS(DL, AI); | |||||
4266 | DEBUG(AS.print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { AS.print(dbgs()); } } while (0); | |||||
4267 | if (AS.isEscaped()) | |||||
4268 | return Changed; | |||||
4269 | ||||||
4270 | // Delete all the dead users of this alloca before splitting and rewriting it. | |||||
4271 | for (Instruction *DeadUser : AS.getDeadUsers()) { | |||||
4272 | // Free up everything used by this instruction. | |||||
4273 | for (Use &DeadOp : DeadUser->operands()) | |||||
4274 | clobberUse(DeadOp); | |||||
4275 | ||||||
4276 | // Now replace the uses of this instruction. | |||||
4277 | DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType())); | |||||
4278 | ||||||
4279 | // And mark it for deletion. | |||||
4280 | DeadInsts.insert(DeadUser); | |||||
4281 | Changed = true; | |||||
4282 | } | |||||
4283 | for (Use *DeadOp : AS.getDeadOperands()) { | |||||
4284 | clobberUse(*DeadOp); | |||||
4285 | Changed = true; | |||||
4286 | } | |||||
4287 | ||||||
4288 | // No slices to split. Leave the dead alloca for a later pass to clean up. | |||||
4289 | if (AS.begin() == AS.end()) | |||||
4290 | return Changed; | |||||
4291 | ||||||
4292 | Changed |= splitAlloca(AI, AS); | |||||
4293 | ||||||
4294 | DEBUG(dbgs() << " Speculating PHIs\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Speculating PHIs\n"; } } while (0); | |||||
4295 | while (!SpeculatablePHIs.empty()) | |||||
4296 | speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val()); | |||||
4297 | ||||||
4298 | DEBUG(dbgs() << " Speculating Selects\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << " Speculating Selects\n"; } } while (0); | |||||
4299 | while (!SpeculatableSelects.empty()) | |||||
4300 | speculateSelectInstLoads(*SpeculatableSelects.pop_back_val()); | |||||
4301 | ||||||
4302 | return Changed; | |||||
4303 | } | |||||
4304 | ||||||
4305 | /// \brief Delete the dead instructions accumulated in this run. | |||||
4306 | /// | |||||
4307 | /// Recursively deletes the dead instructions we've accumulated. This is done | |||||
4308 | /// at the very end to maximize locality of the recursive delete and to | |||||
4309 | /// minimize the problems of invalidated instruction pointers as such pointers | |||||
4310 | /// are used heavily in the intermediate stages of the algorithm. | |||||
4311 | /// | |||||
4312 | /// We also record the alloca instructions deleted here so that they aren't | |||||
4313 | /// subsequently handed to mem2reg to promote. | |||||
4314 | void SROA::deleteDeadInstructions( | |||||
4315 | SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) { | |||||
4316 | while (!DeadInsts.empty()) { | |||||
4317 | Instruction *I = DeadInsts.pop_back_val(); | |||||
4318 | DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "Deleting dead instruction: " << *I << "\n"; } } while (0); | |||||
4319 | ||||||
4320 | I->replaceAllUsesWith(UndefValue::get(I->getType())); | |||||
4321 | ||||||
4322 | for (Use &Operand : I->operands()) | |||||
4323 | if (Instruction *U = dyn_cast<Instruction>(Operand)) { | |||||
4324 | // Zero out the operand and see if it becomes trivially dead. | |||||
4325 | Operand = nullptr; | |||||
4326 | if (isInstructionTriviallyDead(U)) | |||||
4327 | DeadInsts.insert(U); | |||||
4328 | } | |||||
4329 | ||||||
4330 | if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { | |||||
4331 | DeletedAllocas.insert(AI); | |||||
4332 | if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI)) | |||||
4333 | DbgDecl->eraseFromParent(); | |||||
4334 | } | |||||
4335 | ||||||
4336 | ++NumDeleted; | |||||
4337 | I->eraseFromParent(); | |||||
4338 | } | |||||
4339 | } | |||||
4340 | ||||||
4341 | static void enqueueUsersInWorklist(Instruction &I, | |||||
4342 | SmallVectorImpl<Instruction *> &Worklist, | |||||
4343 | SmallPtrSetImpl<Instruction *> &Visited) { | |||||
4344 | for (User *U : I.users()) | |||||
4345 | if (Visited.insert(cast<Instruction>(U)).second) | |||||
4346 | Worklist.push_back(cast<Instruction>(U)); | |||||
4347 | } | |||||
4348 | ||||||
4349 | /// \brief Promote the allocas, using the best available technique. | |||||
4350 | /// | |||||
4351 | /// This attempts to promote whatever allocas have been identified as viable in | |||||
4352 | /// the PromotableAllocas list. If that list is empty, there is nothing to do. | |||||
4353 | /// If there is a domtree available, we attempt to promote using the full power | |||||
4354 | /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is | |||||
4355 | /// based on the SSAUpdater utilities. This function returns whether any | |||||
4356 | /// promotion occurred. | |||||
4357 | bool SROA::promoteAllocas(Function &F) { | |||||
4358 | if (PromotableAllocas.empty()) | |||||
4359 | return false; | |||||
4360 | ||||||
4361 | NumPromoted += PromotableAllocas.size(); | |||||
4362 | ||||||
4363 | if (DT && !ForceSSAUpdater) { | |||||
4364 | DEBUG(dbgs() << "Promoting allocas with mem2reg...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "Promoting allocas with mem2reg...\n" ; } } while (0); | |||||
4365 | PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC); | |||||
4366 | PromotableAllocas.clear(); | |||||
4367 | return true; | |||||
4368 | } | |||||
4369 | ||||||
4370 | DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "Promoting allocas with SSAUpdater...\n" ; } } while (0); | |||||
4371 | SSAUpdater SSA; | |||||
4372 | DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); | |||||
4373 | SmallVector<Instruction *, 64> Insts; | |||||
4374 | ||||||
4375 | // We need a worklist to walk the uses of each alloca. | |||||
4376 | SmallVector<Instruction *, 8> Worklist; | |||||
4377 | SmallPtrSet<Instruction *, 8> Visited; | |||||
4378 | SmallVector<Instruction *, 32> DeadInsts; | |||||
4379 | ||||||
4380 | for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) { | |||||
4381 | AllocaInst *AI = PromotableAllocas[Idx]; | |||||
4382 | Insts.clear(); | |||||
4383 | Worklist.clear(); | |||||
4384 | Visited.clear(); | |||||
4385 | ||||||
4386 | enqueueUsersInWorklist(*AI, Worklist, Visited); | |||||
4387 | ||||||
4388 | while (!Worklist.empty()) { | |||||
4389 | Instruction *I = Worklist.pop_back_val(); | |||||
4390 | ||||||
4391 | // FIXME: Currently the SSAUpdater infrastructure doesn't reason about | |||||
4392 | // lifetime intrinsics and so we strip them (and the bitcasts+GEPs | |||||
4393 | // leading to them) here. Eventually it should use them to optimize the | |||||
4394 | // scalar values produced. | |||||
4395 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { | |||||
4396 | assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||((II->getIntrinsicID() == Intrinsic::lifetime_start || II-> getIntrinsicID() == Intrinsic::lifetime_end) ? static_cast< void> (0) : __assert_fail ("II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 4397, __PRETTY_FUNCTION__)) | |||||
4397 | II->getIntrinsicID() == Intrinsic::lifetime_end)((II->getIntrinsicID() == Intrinsic::lifetime_start || II-> getIntrinsicID() == Intrinsic::lifetime_end) ? static_cast< void> (0) : __assert_fail ("II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 4397, __PRETTY_FUNCTION__)); | |||||
4398 | II->eraseFromParent(); | |||||
4399 | continue; | |||||
4400 | } | |||||
4401 | ||||||
4402 | // Push the loads and stores we find onto the list. SROA will already | |||||
4403 | // have validated that all loads and stores are viable candidates for | |||||
4404 | // promotion. | |||||
4405 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { | |||||
4406 | assert(LI->getType() == AI->getAllocatedType())((LI->getType() == AI->getAllocatedType()) ? static_cast <void> (0) : __assert_fail ("LI->getType() == AI->getAllocatedType()" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 4406, __PRETTY_FUNCTION__)); | |||||
4407 | Insts.push_back(LI); | |||||
4408 | continue; | |||||
4409 | } | |||||
4410 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) { | |||||
4411 | assert(SI->getValueOperand()->getType() == AI->getAllocatedType())((SI->getValueOperand()->getType() == AI->getAllocatedType ()) ? static_cast<void> (0) : __assert_fail ("SI->getValueOperand()->getType() == AI->getAllocatedType()" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Transforms/Scalar/SROA.cpp" , 4411, __PRETTY_FUNCTION__)); | |||||
4412 | Insts.push_back(SI); | |||||
4413 | continue; | |||||
4414 | } | |||||
4415 | ||||||
4416 | // For everything else, we know that only no-op bitcasts and GEPs will | |||||
4417 | // make it this far, just recurse through them and recall them for later | |||||
4418 | // removal. | |||||
4419 | DeadInsts.push_back(I); | |||||
4420 | enqueueUsersInWorklist(*I, Worklist, Visited); | |||||
4421 | } | |||||
4422 | AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts); | |||||
4423 | while (!DeadInsts.empty()) | |||||
4424 | DeadInsts.pop_back_val()->eraseFromParent(); | |||||
4425 | AI->eraseFromParent(); | |||||
4426 | } | |||||
4427 | ||||||
4428 | PromotableAllocas.clear(); | |||||
4429 | return true; | |||||
4430 | } | |||||
4431 | ||||||
4432 | bool SROA::runOnFunction(Function &F) { | |||||
4433 | if (skipOptnoneFunction(F)) | |||||
4434 | return false; | |||||
4435 | ||||||
4436 | DEBUG(dbgs() << "SROA function: " << F.getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("sroa")) { dbgs() << "SROA function: " << F.getName () << "\n"; } } while (0); | |||||
4437 | C = &F.getContext(); | |||||
4438 | DominatorTreeWrapperPass *DTWP = | |||||
4439 | getAnalysisIfAvailable<DominatorTreeWrapperPass>(); | |||||
4440 | DT = DTWP ? &DTWP->getDomTree() : nullptr; | |||||
4441 | AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); | |||||
4442 | ||||||
4443 | BasicBlock &EntryBB = F.getEntryBlock(); | |||||
4444 | for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); | |||||
4445 | I != E; ++I) { | |||||
4446 | if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) | |||||
4447 | Worklist.insert(AI); | |||||
4448 | } | |||||
4449 | ||||||
4450 | bool Changed = false; | |||||
4451 | // A set of deleted alloca instruction pointers which should be removed from | |||||
4452 | // the list of promotable allocas. | |||||
4453 | SmallPtrSet<AllocaInst *, 4> DeletedAllocas; | |||||
4454 | ||||||
4455 | do { | |||||
4456 | while (!Worklist.empty()) { | |||||
4457 | Changed |= runOnAlloca(*Worklist.pop_back_val()); | |||||
4458 | deleteDeadInstructions(DeletedAllocas); | |||||
4459 | ||||||
4460 | // Remove the deleted allocas from various lists so that we don't try to | |||||
4461 | // continue processing them. | |||||
4462 | if (!DeletedAllocas.empty()) { | |||||
4463 | auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); }; | |||||
4464 | Worklist.remove_if(IsInSet); | |||||
4465 | PostPromotionWorklist.remove_if(IsInSet); | |||||
4466 | PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(), | |||||
4467 | PromotableAllocas.end(), | |||||
4468 | IsInSet), | |||||
4469 | PromotableAllocas.end()); | |||||
4470 | DeletedAllocas.clear(); | |||||
4471 | } | |||||
4472 | } | |||||
4473 | ||||||
4474 | Changed |= promoteAllocas(F); | |||||
4475 | ||||||
4476 | Worklist = PostPromotionWorklist; | |||||
4477 | PostPromotionWorklist.clear(); | |||||
4478 | } while (!Worklist.empty()); | |||||
4479 | ||||||
4480 | return Changed; | |||||
4481 | } | |||||
4482 | ||||||
4483 | void SROA::getAnalysisUsage(AnalysisUsage &AU) const { | |||||
4484 | AU.addRequired<AssumptionCacheTracker>(); | |||||
4485 | if (RequiresDomTree) | |||||
4486 | AU.addRequired<DominatorTreeWrapperPass>(); | |||||
4487 | AU.setPreservesCFG(); | |||||
4488 | } |