Bug Summary

File:build/source/llvm/lib/Transforms/Scalar/SROA.cpp
Warning:line 3070, column 54
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SROA.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16 -I lib/Transforms/Scalar -I /build/source/llvm/lib/Transforms/Scalar -I include -I /build/source/llvm/include -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -source-date-epoch 1670930111 -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-12-13-140357-15994-1 -x c++ /build/source/llvm/lib/Transforms/Scalar/SROA.cpp
1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This transformation implements the well known scalar replacement of
10/// aggregates transformation. It tries to identify promotable elements of an
11/// aggregate alloca, and promote them to registers. It will also try to
12/// convert uses of an element (or set of elements) of an alloca into a vector
13/// or bitfield-style integer scalar if appropriate.
14///
15/// It works to do this with minimal slicing of the alloca so that regions
16/// which are merely transferred in and out of external memory remain unchanged
17/// and are not decomposed to scalar code.
18///
19/// Because this also performs alloca promotion, it can be thought of as also
20/// serving the purpose of SSA formation. The algorithm iterates on the
21/// function until all opportunities for promotion have been realized.
22///
23//===----------------------------------------------------------------------===//
24
25#include "llvm/Transforms/Scalar/SROA.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/PointerIntPair.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SetVector.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/SmallPtrSet.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/ADT/Twine.h"
38#include "llvm/ADT/iterator.h"
39#include "llvm/ADT/iterator_range.h"
40#include "llvm/Analysis/AssumptionCache.h"
41#include "llvm/Analysis/DomTreeUpdater.h"
42#include "llvm/Analysis/GlobalsModRef.h"
43#include "llvm/Analysis/Loads.h"
44#include "llvm/Analysis/PtrUseVisitor.h"
45#include "llvm/Config/llvm-config.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/Constant.h"
48#include "llvm/IR/ConstantFolder.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DIBuilder.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugInfo.h"
53#include "llvm/IR/DebugInfoMetadata.h"
54#include "llvm/IR/DerivedTypes.h"
55#include "llvm/IR/Dominators.h"
56#include "llvm/IR/Function.h"
57#include "llvm/IR/GetElementPtrTypeIterator.h"
58#include "llvm/IR/GlobalAlias.h"
59#include "llvm/IR/IRBuilder.h"
60#include "llvm/IR/InstVisitor.h"
61#include "llvm/IR/Instruction.h"
62#include "llvm/IR/Instructions.h"
63#include "llvm/IR/IntrinsicInst.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/Metadata.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/Operator.h"
68#include "llvm/IR/PassManager.h"
69#include "llvm/IR/Type.h"
70#include "llvm/IR/Use.h"
71#include "llvm/IR/User.h"
72#include "llvm/IR/Value.h"
73#include "llvm/InitializePasses.h"
74#include "llvm/Pass.h"
75#include "llvm/Support/Casting.h"
76#include "llvm/Support/CommandLine.h"
77#include "llvm/Support/Compiler.h"
78#include "llvm/Support/Debug.h"
79#include "llvm/Support/ErrorHandling.h"
80#include "llvm/Support/raw_ostream.h"
81#include "llvm/Transforms/Scalar.h"
82#include "llvm/Transforms/Utils/BasicBlockUtils.h"
83#include "llvm/Transforms/Utils/Local.h"
84#include "llvm/Transforms/Utils/PromoteMemToReg.h"
85#include <algorithm>
86#include <cassert>
87#include <cstddef>
88#include <cstdint>
89#include <cstring>
90#include <iterator>
91#include <string>
92#include <tuple>
93#include <utility>
94#include <vector>
95
96using namespace llvm;
97using namespace llvm::sroa;
98
99#define DEBUG_TYPE"sroa" "sroa"
100
101STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement")static llvm::Statistic NumAllocasAnalyzed = {"sroa", "NumAllocasAnalyzed"
, "Number of allocas analyzed for replacement"}
;
102STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed")static llvm::Statistic NumAllocaPartitions = {"sroa", "NumAllocaPartitions"
, "Number of alloca partitions formed"}
;
103STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca")static llvm::Statistic MaxPartitionsPerAlloca = {"sroa", "MaxPartitionsPerAlloca"
, "Maximum number of partitions per alloca"}
;
104STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten")static llvm::Statistic NumAllocaPartitionUses = {"sroa", "NumAllocaPartitionUses"
, "Number of alloca partition uses rewritten"}
;
105STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition")static llvm::Statistic MaxUsesPerAllocaPartition = {"sroa", "MaxUsesPerAllocaPartition"
, "Maximum number of uses of a partition"}
;
106STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced")static llvm::Statistic NumNewAllocas = {"sroa", "NumNewAllocas"
, "Number of new, smaller allocas introduced"}
;
107STATISTIC(NumPromoted, "Number of allocas promoted to SSA values")static llvm::Statistic NumPromoted = {"sroa", "NumPromoted", "Number of allocas promoted to SSA values"
}
;
108STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion")static llvm::Statistic NumLoadsSpeculated = {"sroa", "NumLoadsSpeculated"
, "Number of loads speculated to allow promotion"}
;
109STATISTIC(NumLoadsPredicated,static llvm::Statistic NumLoadsPredicated = {"sroa", "NumLoadsPredicated"
, "Number of loads rewritten into predicated loads to allow promotion"
}
110 "Number of loads rewritten into predicated loads to allow promotion")static llvm::Statistic NumLoadsPredicated = {"sroa", "NumLoadsPredicated"
, "Number of loads rewritten into predicated loads to allow promotion"
}
;
111STATISTIC(static llvm::Statistic NumStoresPredicated = {"sroa", "NumStoresPredicated"
, "Number of stores rewritten into predicated loads to allow promotion"
}
112 NumStoresPredicated,static llvm::Statistic NumStoresPredicated = {"sroa", "NumStoresPredicated"
, "Number of stores rewritten into predicated loads to allow promotion"
}
113 "Number of stores rewritten into predicated loads to allow promotion")static llvm::Statistic NumStoresPredicated = {"sroa", "NumStoresPredicated"
, "Number of stores rewritten into predicated loads to allow promotion"
}
;
114STATISTIC(NumDeleted, "Number of instructions deleted")static llvm::Statistic NumDeleted = {"sroa", "NumDeleted", "Number of instructions deleted"
}
;
115STATISTIC(NumVectorized, "Number of vectorized aggregates")static llvm::Statistic NumVectorized = {"sroa", "NumVectorized"
, "Number of vectorized aggregates"}
;
116
117/// Hidden option to experiment with completely strict handling of inbounds
118/// GEPs.
119static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
120 cl::Hidden);
121namespace {
122
123/// A custom IRBuilder inserter which prefixes all names, but only in
124/// Assert builds.
125class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
126 std::string Prefix;
127
128 Twine getNameWithPrefix(const Twine &Name) const {
129 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
130 }
131
132public:
133 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
134
135 void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
136 BasicBlock::iterator InsertPt) const override {
137 IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB,
138 InsertPt);
139 }
140};
141
142/// Provide a type for IRBuilder that drops names in release builds.
143using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
144
145/// A used slice of an alloca.
146///
147/// This structure represents a slice of an alloca used by some instruction. It
148/// stores both the begin and end offsets of this use, a pointer to the use
149/// itself, and a flag indicating whether we can classify the use as splittable
150/// or not when forming partitions of the alloca.
151class Slice {
152 /// The beginning offset of the range.
153 uint64_t BeginOffset = 0;
154
155 /// The ending offset, not included in the range.
156 uint64_t EndOffset = 0;
157
158 /// Storage for both the use of this slice and whether it can be
159 /// split.
160 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
161
162public:
163 Slice() = default;
164
165 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
166 : BeginOffset(BeginOffset), EndOffset(EndOffset),
167 UseAndIsSplittable(U, IsSplittable) {}
168
169 uint64_t beginOffset() const { return BeginOffset; }
170 uint64_t endOffset() const { return EndOffset; }
171
172 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
173 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
174
175 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
176
177 bool isDead() const { return getUse() == nullptr; }
178 void kill() { UseAndIsSplittable.setPointer(nullptr); }
179
180 /// Support for ordering ranges.
181 ///
182 /// This provides an ordering over ranges such that start offsets are
183 /// always increasing, and within equal start offsets, the end offsets are
184 /// decreasing. Thus the spanning range comes first in a cluster with the
185 /// same start position.
186 bool operator<(const Slice &RHS) const {
187 if (beginOffset() < RHS.beginOffset())
188 return true;
189 if (beginOffset() > RHS.beginOffset())
190 return false;
191 if (isSplittable() != RHS.isSplittable())
192 return !isSplittable();
193 if (endOffset() > RHS.endOffset())
194 return true;
195 return false;
196 }
197
198 /// Support comparison with a single offset to allow binary searches.
199 friend LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) bool operator<(const Slice &LHS,
200 uint64_t RHSOffset) {
201 return LHS.beginOffset() < RHSOffset;
202 }
203 friend LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) bool operator<(uint64_t LHSOffset,
204 const Slice &RHS) {
205 return LHSOffset < RHS.beginOffset();
206 }
207
208 bool operator==(const Slice &RHS) const {
209 return isSplittable() == RHS.isSplittable() &&
210 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
211 }
212 bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
213};
214
215} // end anonymous namespace
216
217/// Representation of the alloca slices.
218///
219/// This class represents the slices of an alloca which are formed by its
220/// various uses. If a pointer escapes, we can't fully build a representation
221/// for the slices used and we reflect that in this structure. The uses are
222/// stored, sorted by increasing beginning offset and with unsplittable slices
223/// starting at a particular offset before splittable slices.
224class llvm::sroa::AllocaSlices {
225public:
226 /// Construct the slices of a particular alloca.
227 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
228
229 /// Test whether a pointer to the allocation escapes our analysis.
230 ///
231 /// If this is true, the slices are never fully built and should be
232 /// ignored.
233 bool isEscaped() const { return PointerEscapingInstr; }
234
235 /// Support for iterating over the slices.
236 /// @{
237 using iterator = SmallVectorImpl<Slice>::iterator;
238 using range = iterator_range<iterator>;
239
240 iterator begin() { return Slices.begin(); }
241 iterator end() { return Slices.end(); }
242
243 using const_iterator = SmallVectorImpl<Slice>::const_iterator;
244 using const_range = iterator_range<const_iterator>;
245
246 const_iterator begin() const { return Slices.begin(); }
247 const_iterator end() const { return Slices.end(); }
248 /// @}
249
250 /// Erase a range of slices.
251 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
252
253 /// Insert new slices for this alloca.
254 ///
255 /// This moves the slices into the alloca's slices collection, and re-sorts
256 /// everything so that the usual ordering properties of the alloca's slices
257 /// hold.
258 void insert(ArrayRef<Slice> NewSlices) {
259 int OldSize = Slices.size();
260 Slices.append(NewSlices.begin(), NewSlices.end());
261 auto SliceI = Slices.begin() + OldSize;
262 llvm::sort(SliceI, Slices.end());
263 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
264 }
265
266 // Forward declare the iterator and range accessor for walking the
267 // partitions.
268 class partition_iterator;
269 iterator_range<partition_iterator> partitions();
270
271 /// Access the dead users for this alloca.
272 ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
273
274 /// Access Uses that should be dropped if the alloca is promotable.
275 ArrayRef<Use *> getDeadUsesIfPromotable() const {
276 return DeadUseIfPromotable;
277 }
278
279 /// Access the dead operands referring to this alloca.
280 ///
281 /// These are operands which have cannot actually be used to refer to the
282 /// alloca as they are outside its range and the user doesn't correct for
283 /// that. These mostly consist of PHI node inputs and the like which we just
284 /// need to replace with undef.
285 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
286
287#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
288 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
289 void printSlice(raw_ostream &OS, const_iterator I,
290 StringRef Indent = " ") const;
291 void printUse(raw_ostream &OS, const_iterator I,
292 StringRef Indent = " ") const;
293 void print(raw_ostream &OS) const;
294 void dump(const_iterator I) const;
295 void dump() const;
296#endif
297
298private:
299 template <typename DerivedT, typename RetT = void> class BuilderBase;
300 class SliceBuilder;
301
302 friend class AllocaSlices::SliceBuilder;
303
304#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
305 /// Handle to alloca instruction to simplify method interfaces.
306 AllocaInst &AI;
307#endif
308
309 /// The instruction responsible for this alloca not having a known set
310 /// of slices.
311 ///
312 /// When an instruction (potentially) escapes the pointer to the alloca, we
313 /// store a pointer to that here and abort trying to form slices of the
314 /// alloca. This will be null if the alloca slices are analyzed successfully.
315 Instruction *PointerEscapingInstr;
316
317 /// The slices of the alloca.
318 ///
319 /// We store a vector of the slices formed by uses of the alloca here. This
320 /// vector is sorted by increasing begin offset, and then the unsplittable
321 /// slices before the splittable ones. See the Slice inner class for more
322 /// details.
323 SmallVector<Slice, 8> Slices;
324
325 /// Instructions which will become dead if we rewrite the alloca.
326 ///
327 /// Note that these are not separated by slice. This is because we expect an
328 /// alloca to be completely rewritten or not rewritten at all. If rewritten,
329 /// all these instructions can simply be removed and replaced with poison as
330 /// they come from outside of the allocated space.
331 SmallVector<Instruction *, 8> DeadUsers;
332
333 /// Uses which will become dead if can promote the alloca.
334 SmallVector<Use *, 8> DeadUseIfPromotable;
335
336 /// Operands which will become dead if we rewrite the alloca.
337 ///
338 /// These are operands that in their particular use can be replaced with
339 /// poison when we rewrite the alloca. These show up in out-of-bounds inputs
340 /// to PHI nodes and the like. They aren't entirely dead (there might be
341 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
342 /// want to swap this particular input for poison to simplify the use lists of
343 /// the alloca.
344 SmallVector<Use *, 8> DeadOperands;
345};
346
347/// A partition of the slices.
348///
349/// An ephemeral representation for a range of slices which can be viewed as
350/// a partition of the alloca. This range represents a span of the alloca's
351/// memory which cannot be split, and provides access to all of the slices
352/// overlapping some part of the partition.
353///
354/// Objects of this type are produced by traversing the alloca's slices, but
355/// are only ephemeral and not persistent.
356class llvm::sroa::Partition {
357private:
358 friend class AllocaSlices;
359 friend class AllocaSlices::partition_iterator;
360
361 using iterator = AllocaSlices::iterator;
362
363 /// The beginning and ending offsets of the alloca for this
364 /// partition.
365 uint64_t BeginOffset = 0, EndOffset = 0;
366
367 /// The start and end iterators of this partition.
368 iterator SI, SJ;
369
370 /// A collection of split slice tails overlapping the partition.
371 SmallVector<Slice *, 4> SplitTails;
372
373 /// Raw constructor builds an empty partition starting and ending at
374 /// the given iterator.
375 Partition(iterator SI) : SI(SI), SJ(SI) {}
376
377public:
378 /// The start offset of this partition.
379 ///
380 /// All of the contained slices start at or after this offset.
381 uint64_t beginOffset() const { return BeginOffset; }
382
383 /// The end offset of this partition.
384 ///
385 /// All of the contained slices end at or before this offset.
386 uint64_t endOffset() const { return EndOffset; }
387
388 /// The size of the partition.
389 ///
390 /// Note that this can never be zero.
391 uint64_t size() const {
392 assert(BeginOffset < EndOffset && "Partitions must span some bytes!")(static_cast <bool> (BeginOffset < EndOffset &&
"Partitions must span some bytes!") ? void (0) : __assert_fail
("BeginOffset < EndOffset && \"Partitions must span some bytes!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 392, __extension__ __PRETTY_FUNCTION__
))
;
393 return EndOffset - BeginOffset;
394 }
395
396 /// Test whether this partition contains no slices, and merely spans
397 /// a region occupied by split slices.
398 bool empty() const { return SI == SJ; }
399
400 /// \name Iterate slices that start within the partition.
401 /// These may be splittable or unsplittable. They have a begin offset >= the
402 /// partition begin offset.
403 /// @{
404 // FIXME: We should probably define a "concat_iterator" helper and use that
405 // to stitch together pointee_iterators over the split tails and the
406 // contiguous iterators of the partition. That would give a much nicer
407 // interface here. We could then additionally expose filtered iterators for
408 // split, unsplit, and unsplittable splices based on the usage patterns.
409 iterator begin() const { return SI; }
410 iterator end() const { return SJ; }
411 /// @}
412
413 /// Get the sequence of split slice tails.
414 ///
415 /// These tails are of slices which start before this partition but are
416 /// split and overlap into the partition. We accumulate these while forming
417 /// partitions.
418 ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
419};
420
421/// An iterator over partitions of the alloca's slices.
422///
423/// This iterator implements the core algorithm for partitioning the alloca's
424/// slices. It is a forward iterator as we don't support backtracking for
425/// efficiency reasons, and re-use a single storage area to maintain the
426/// current set of split slices.
427///
428/// It is templated on the slice iterator type to use so that it can operate
429/// with either const or non-const slice iterators.
430class AllocaSlices::partition_iterator
431 : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
432 Partition> {
433 friend class AllocaSlices;
434
435 /// Most of the state for walking the partitions is held in a class
436 /// with a nice interface for examining them.
437 Partition P;
438
439 /// We need to keep the end of the slices to know when to stop.
440 AllocaSlices::iterator SE;
441
442 /// We also need to keep track of the maximum split end offset seen.
443 /// FIXME: Do we really?
444 uint64_t MaxSplitSliceEndOffset = 0;
445
446 /// Sets the partition to be empty at given iterator, and sets the
447 /// end iterator.
448 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
449 : P(SI), SE(SE) {
450 // If not already at the end, advance our state to form the initial
451 // partition.
452 if (SI != SE)
453 advance();
454 }
455
456 /// Advance the iterator to the next partition.
457 ///
458 /// Requires that the iterator not be at the end of the slices.
459 void advance() {
460 assert((P.SI != SE || !P.SplitTails.empty()) &&(static_cast <bool> ((P.SI != SE || !P.SplitTails.empty
()) && "Cannot advance past the end of the slices!") ?
void (0) : __assert_fail ("(P.SI != SE || !P.SplitTails.empty()) && \"Cannot advance past the end of the slices!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 461, __extension__ __PRETTY_FUNCTION__
))
461 "Cannot advance past the end of the slices!")(static_cast <bool> ((P.SI != SE || !P.SplitTails.empty
()) && "Cannot advance past the end of the slices!") ?
void (0) : __assert_fail ("(P.SI != SE || !P.SplitTails.empty()) && \"Cannot advance past the end of the slices!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 461, __extension__ __PRETTY_FUNCTION__
))
;
462
463 // Clear out any split uses which have ended.
464 if (!P.SplitTails.empty()) {
465 if (P.EndOffset >= MaxSplitSliceEndOffset) {
466 // If we've finished all splits, this is easy.
467 P.SplitTails.clear();
468 MaxSplitSliceEndOffset = 0;
469 } else {
470 // Remove the uses which have ended in the prior partition. This
471 // cannot change the max split slice end because we just checked that
472 // the prior partition ended prior to that max.
473 llvm::erase_if(P.SplitTails,
474 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
475 assert(llvm::any_of(P.SplitTails,(static_cast <bool> (llvm::any_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset
; }) && "Could not find the current max split slice offset!"
) ? void (0) : __assert_fail ("llvm::any_of(P.SplitTails, [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 479, __extension__ __PRETTY_FUNCTION__
))
476 [&](Slice *S) {(static_cast <bool> (llvm::any_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset
; }) && "Could not find the current max split slice offset!"
) ? void (0) : __assert_fail ("llvm::any_of(P.SplitTails, [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 479, __extension__ __PRETTY_FUNCTION__
))
477 return S->endOffset() == MaxSplitSliceEndOffset;(static_cast <bool> (llvm::any_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset
; }) && "Could not find the current max split slice offset!"
) ? void (0) : __assert_fail ("llvm::any_of(P.SplitTails, [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 479, __extension__ __PRETTY_FUNCTION__
))
478 }) &&(static_cast <bool> (llvm::any_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset
; }) && "Could not find the current max split slice offset!"
) ? void (0) : __assert_fail ("llvm::any_of(P.SplitTails, [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 479, __extension__ __PRETTY_FUNCTION__
))
479 "Could not find the current max split slice offset!")(static_cast <bool> (llvm::any_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset
; }) && "Could not find the current max split slice offset!"
) ? void (0) : __assert_fail ("llvm::any_of(P.SplitTails, [&](Slice *S) { return S->endOffset() == MaxSplitSliceEndOffset; }) && \"Could not find the current max split slice offset!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 479, __extension__ __PRETTY_FUNCTION__
))
;
480 assert(llvm::all_of(P.SplitTails,(static_cast <bool> (llvm::all_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset
; }) && "Max split slice end offset is not actually the max!"
) ? void (0) : __assert_fail ("llvm::all_of(P.SplitTails, [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 484, __extension__ __PRETTY_FUNCTION__
))
481 [&](Slice *S) {(static_cast <bool> (llvm::all_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset
; }) && "Max split slice end offset is not actually the max!"
) ? void (0) : __assert_fail ("llvm::all_of(P.SplitTails, [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 484, __extension__ __PRETTY_FUNCTION__
))
482 return S->endOffset() <= MaxSplitSliceEndOffset;(static_cast <bool> (llvm::all_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset
; }) && "Max split slice end offset is not actually the max!"
) ? void (0) : __assert_fail ("llvm::all_of(P.SplitTails, [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 484, __extension__ __PRETTY_FUNCTION__
))
483 }) &&(static_cast <bool> (llvm::all_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset
; }) && "Max split slice end offset is not actually the max!"
) ? void (0) : __assert_fail ("llvm::all_of(P.SplitTails, [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 484, __extension__ __PRETTY_FUNCTION__
))
484 "Max split slice end offset is not actually the max!")(static_cast <bool> (llvm::all_of(P.SplitTails, [&]
(Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset
; }) && "Max split slice end offset is not actually the max!"
) ? void (0) : __assert_fail ("llvm::all_of(P.SplitTails, [&](Slice *S) { return S->endOffset() <= MaxSplitSliceEndOffset; }) && \"Max split slice end offset is not actually the max!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 484, __extension__ __PRETTY_FUNCTION__
))
;
485 }
486 }
487
488 // If P.SI is already at the end, then we've cleared the split tail and
489 // now have an end iterator.
490 if (P.SI == SE) {
491 assert(P.SplitTails.empty() && "Failed to clear the split slices!")(static_cast <bool> (P.SplitTails.empty() && "Failed to clear the split slices!"
) ? void (0) : __assert_fail ("P.SplitTails.empty() && \"Failed to clear the split slices!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 491, __extension__ __PRETTY_FUNCTION__
))
;
492 return;
493 }
494
495 // If we had a non-empty partition previously, set up the state for
496 // subsequent partitions.
497 if (P.SI != P.SJ) {
498 // Accumulate all the splittable slices which started in the old
499 // partition into the split list.
500 for (Slice &S : P)
501 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
502 P.SplitTails.push_back(&S);
503 MaxSplitSliceEndOffset =
504 std::max(S.endOffset(), MaxSplitSliceEndOffset);
505 }
506
507 // Start from the end of the previous partition.
508 P.SI = P.SJ;
509
510 // If P.SI is now at the end, we at most have a tail of split slices.
511 if (P.SI == SE) {
512 P.BeginOffset = P.EndOffset;
513 P.EndOffset = MaxSplitSliceEndOffset;
514 return;
515 }
516
517 // If the we have split slices and the next slice is after a gap and is
518 // not splittable immediately form an empty partition for the split
519 // slices up until the next slice begins.
520 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
521 !P.SI->isSplittable()) {
522 P.BeginOffset = P.EndOffset;
523 P.EndOffset = P.SI->beginOffset();
524 return;
525 }
526 }
527
528 // OK, we need to consume new slices. Set the end offset based on the
529 // current slice, and step SJ past it. The beginning offset of the
530 // partition is the beginning offset of the next slice unless we have
531 // pre-existing split slices that are continuing, in which case we begin
532 // at the prior end offset.
533 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
534 P.EndOffset = P.SI->endOffset();
535 ++P.SJ;
536
537 // There are two strategies to form a partition based on whether the
538 // partition starts with an unsplittable slice or a splittable slice.
539 if (!P.SI->isSplittable()) {
540 // When we're forming an unsplittable region, it must always start at
541 // the first slice and will extend through its end.
542 assert(P.BeginOffset == P.SI->beginOffset())(static_cast <bool> (P.BeginOffset == P.SI->beginOffset
()) ? void (0) : __assert_fail ("P.BeginOffset == P.SI->beginOffset()"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 542, __extension__ __PRETTY_FUNCTION__
))
;
543
544 // Form a partition including all of the overlapping slices with this
545 // unsplittable slice.
546 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
547 if (!P.SJ->isSplittable())
548 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
549 ++P.SJ;
550 }
551
552 // We have a partition across a set of overlapping unsplittable
553 // partitions.
554 return;
555 }
556
557 // If we're starting with a splittable slice, then we need to form
558 // a synthetic partition spanning it and any other overlapping splittable
559 // splices.
560 assert(P.SI->isSplittable() && "Forming a splittable partition!")(static_cast <bool> (P.SI->isSplittable() &&
"Forming a splittable partition!") ? void (0) : __assert_fail
("P.SI->isSplittable() && \"Forming a splittable partition!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 560, __extension__ __PRETTY_FUNCTION__
))
;
561
562 // Collect all of the overlapping splittable slices.
563 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
564 P.SJ->isSplittable()) {
565 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
566 ++P.SJ;
567 }
568
569 // Back upiP.EndOffset if we ended the span early when encountering an
570 // unsplittable slice. This synthesizes the early end offset of
571 // a partition spanning only splittable slices.
572 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
573 assert(!P.SJ->isSplittable())(static_cast <bool> (!P.SJ->isSplittable()) ? void (
0) : __assert_fail ("!P.SJ->isSplittable()", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 573, __extension__ __PRETTY_FUNCTION__))
;
574 P.EndOffset = P.SJ->beginOffset();
575 }
576 }
577
578public:
579 bool operator==(const partition_iterator &RHS) const {
580 assert(SE == RHS.SE &&(static_cast <bool> (SE == RHS.SE && "End iterators don't match between compared partition iterators!"
) ? void (0) : __assert_fail ("SE == RHS.SE && \"End iterators don't match between compared partition iterators!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 581, __extension__ __PRETTY_FUNCTION__
))
581 "End iterators don't match between compared partition iterators!")(static_cast <bool> (SE == RHS.SE && "End iterators don't match between compared partition iterators!"
) ? void (0) : __assert_fail ("SE == RHS.SE && \"End iterators don't match between compared partition iterators!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 581, __extension__ __PRETTY_FUNCTION__
))
;
582
583 // The observed positions of partitions is marked by the P.SI iterator and
584 // the emptiness of the split slices. The latter is only relevant when
585 // P.SI == SE, as the end iterator will additionally have an empty split
586 // slices list, but the prior may have the same P.SI and a tail of split
587 // slices.
588 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
589 assert(P.SJ == RHS.P.SJ &&(static_cast <bool> (P.SJ == RHS.P.SJ && "Same set of slices formed two different sized partitions!"
) ? void (0) : __assert_fail ("P.SJ == RHS.P.SJ && \"Same set of slices formed two different sized partitions!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 590, __extension__ __PRETTY_FUNCTION__
))
590 "Same set of slices formed two different sized partitions!")(static_cast <bool> (P.SJ == RHS.P.SJ && "Same set of slices formed two different sized partitions!"
) ? void (0) : __assert_fail ("P.SJ == RHS.P.SJ && \"Same set of slices formed two different sized partitions!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 590, __extension__ __PRETTY_FUNCTION__
))
;
591 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&(static_cast <bool> (P.SplitTails.size() == RHS.P.SplitTails
.size() && "Same slice position with differently sized non-empty split "
"slice tails!") ? void (0) : __assert_fail ("P.SplitTails.size() == RHS.P.SplitTails.size() && \"Same slice position with differently sized non-empty split \" \"slice tails!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 593, __extension__ __PRETTY_FUNCTION__
))
592 "Same slice position with differently sized non-empty split "(static_cast <bool> (P.SplitTails.size() == RHS.P.SplitTails
.size() && "Same slice position with differently sized non-empty split "
"slice tails!") ? void (0) : __assert_fail ("P.SplitTails.size() == RHS.P.SplitTails.size() && \"Same slice position with differently sized non-empty split \" \"slice tails!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 593, __extension__ __PRETTY_FUNCTION__
))
593 "slice tails!")(static_cast <bool> (P.SplitTails.size() == RHS.P.SplitTails
.size() && "Same slice position with differently sized non-empty split "
"slice tails!") ? void (0) : __assert_fail ("P.SplitTails.size() == RHS.P.SplitTails.size() && \"Same slice position with differently sized non-empty split \" \"slice tails!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 593, __extension__ __PRETTY_FUNCTION__
))
;
594 return true;
595 }
596 return false;
597 }
598
599 partition_iterator &operator++() {
600 advance();
601 return *this;
602 }
603
604 Partition &operator*() { return P; }
605};
606
607/// A forward range over the partitions of the alloca's slices.
608///
609/// This accesses an iterator range over the partitions of the alloca's
610/// slices. It computes these partitions on the fly based on the overlapping
611/// offsets of the slices and the ability to split them. It will visit "empty"
612/// partitions to cover regions of the alloca only accessed via split
613/// slices.
614iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
615 return make_range(partition_iterator(begin(), end()),
616 partition_iterator(end(), end()));
617}
618
619static Value *foldSelectInst(SelectInst &SI) {
620 // If the condition being selected on is a constant or the same value is
621 // being selected between, fold the select. Yes this does (rarely) happen
622 // early on.
623 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
624 return SI.getOperand(1 + CI->isZero());
625 if (SI.getOperand(1) == SI.getOperand(2))
626 return SI.getOperand(1);
627
628 return nullptr;
629}
630
631/// A helper that folds a PHI node or a select.
632static Value *foldPHINodeOrSelectInst(Instruction &I) {
633 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
634 // If PN merges together the same value, return that value.
635 return PN->hasConstantValue();
636 }
637 return foldSelectInst(cast<SelectInst>(I));
638}
639
640/// Builder for the alloca slices.
641///
642/// This class builds a set of alloca slices by recursively visiting the uses
643/// of an alloca and making a slice for each load and store at each offset.
644class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
645 friend class PtrUseVisitor<SliceBuilder>;
646 friend class InstVisitor<SliceBuilder>;
647
648 using Base = PtrUseVisitor<SliceBuilder>;
649
650 const uint64_t AllocSize;
651 AllocaSlices &AS;
652
653 SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
654 SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes;
655
656 /// Set to de-duplicate dead instructions found in the use walk.
657 SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
658
659public:
660 SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
661 : PtrUseVisitor<SliceBuilder>(DL),
662 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize()),
663 AS(AS) {}
664
665private:
666 void markAsDead(Instruction &I) {
667 if (VisitedDeadInsts.insert(&I).second)
668 AS.DeadUsers.push_back(&I);
669 }
670
671 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
672 bool IsSplittable = false) {
673 // Completely skip uses which have a zero size or start either before or
674 // past the end of the allocation.
675 if (Size == 0 || Offset.uge(AllocSize)) {
676 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte use @" << Offset << " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
677 << Offsetdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte use @" << Offset << " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
678 << " which has zero size or starts outside of the "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte use @" << Offset << " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
679 << AllocSize << " byte alloca:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte use @" << Offset << " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
680 << " alloca: " << AS.AI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte use @" << Offset << " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
681 << " use: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte use @" << Offset << " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
;
682 return markAsDead(I);
683 }
684
685 uint64_t BeginOffset = Offset.getZExtValue();
686 uint64_t EndOffset = BeginOffset + Size;
687
688 // Clamp the end offset to the end of the allocation. Note that this is
689 // formulated to handle even the case where "BeginOffset + Size" overflows.
690 // This may appear superficially to be something we could ignore entirely,
691 // but that is not so! There may be widened loads or PHI-node uses where
692 // some instructions are dead but not others. We can't completely ignore
693 // them, and so have to record at least the information here.
694 assert(AllocSize >= BeginOffset)(static_cast <bool> (AllocSize >= BeginOffset) ? void
(0) : __assert_fail ("AllocSize >= BeginOffset", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 694, __extension__ __PRETTY_FUNCTION__))
; // Established above.
695 if (Size > AllocSize - BeginOffset) {
696 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Clamping a " << Size
<< " byte use @" << Offset << " to remain within the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
697 << Offset << " to remain within the " << AllocSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Clamping a " << Size
<< " byte use @" << Offset << " to remain within the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
698 << " byte alloca:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Clamping a " << Size
<< " byte use @" << Offset << " to remain within the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
699 << " alloca: " << AS.AI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Clamping a " << Size
<< " byte use @" << Offset << " to remain within the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
700 << " use: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Clamping a " << Size
<< " byte use @" << Offset << " to remain within the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
I << "\n"; } } while (false)
;
701 EndOffset = AllocSize;
702 }
703
704 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
705 }
706
707 void visitBitCastInst(BitCastInst &BC) {
708 if (BC.use_empty())
709 return markAsDead(BC);
710
711 return Base::visitBitCastInst(BC);
712 }
713
714 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
715 if (ASC.use_empty())
716 return markAsDead(ASC);
717
718 return Base::visitAddrSpaceCastInst(ASC);
719 }
720
721 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
722 if (GEPI.use_empty())
723 return markAsDead(GEPI);
724
725 if (SROAStrictInbounds && GEPI.isInBounds()) {
726 // FIXME: This is a manually un-factored variant of the basic code inside
727 // of GEPs with checking of the inbounds invariant specified in the
728 // langref in a very strict sense. If we ever want to enable
729 // SROAStrictInbounds, this code should be factored cleanly into
730 // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
731 // by writing out the code here where we have the underlying allocation
732 // size readily available.
733 APInt GEPOffset = Offset;
734 const DataLayout &DL = GEPI.getModule()->getDataLayout();
735 for (gep_type_iterator GTI = gep_type_begin(GEPI),
736 GTE = gep_type_end(GEPI);
737 GTI != GTE; ++GTI) {
738 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
739 if (!OpC)
740 break;
741
742 // Handle a struct index, which adds its field offset to the pointer.
743 if (StructType *STy = GTI.getStructTypeOrNull()) {
744 unsigned ElementIdx = OpC->getZExtValue();
745 const StructLayout *SL = DL.getStructLayout(STy);
746 GEPOffset +=
747 APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
748 } else {
749 // For array or vector indices, scale the index by the size of the
750 // type.
751 APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
752 GEPOffset +=
753 Index *
754 APInt(Offset.getBitWidth(),
755 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize());
756 }
757
758 // If this index has computed an intermediate pointer which is not
759 // inbounds, then the result of the GEP is a poison value and we can
760 // delete it and all uses.
761 if (GEPOffset.ugt(AllocSize))
762 return markAsDead(GEPI);
763 }
764 }
765
766 return Base::visitGetElementPtrInst(GEPI);
767 }
768
769 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
770 uint64_t Size, bool IsVolatile) {
771 // We allow splitting of non-volatile loads and stores where the type is an
772 // integer type. These may be used to implement 'memcpy' or other "transfer
773 // of bits" patterns.
774 bool IsSplittable =
775 Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty);
776
777 insertUse(I, Offset, Size, IsSplittable);
778 }
779
780 void visitLoadInst(LoadInst &LI) {
781 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&(static_cast <bool> ((!LI.isSimple() || LI.getType()->
isSingleValueType()) && "All simple FCA loads should have been pre-split"
) ? void (0) : __assert_fail ("(!LI.isSimple() || LI.getType()->isSingleValueType()) && \"All simple FCA loads should have been pre-split\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 782, __extension__ __PRETTY_FUNCTION__
))
782 "All simple FCA loads should have been pre-split")(static_cast <bool> ((!LI.isSimple() || LI.getType()->
isSingleValueType()) && "All simple FCA loads should have been pre-split"
) ? void (0) : __assert_fail ("(!LI.isSimple() || LI.getType()->isSingleValueType()) && \"All simple FCA loads should have been pre-split\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 782, __extension__ __PRETTY_FUNCTION__
))
;
783
784 if (!IsOffsetKnown)
785 return PI.setAborted(&LI);
786
787 if (isa<ScalableVectorType>(LI.getType()))
788 return PI.setAborted(&LI);
789
790 uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
791 return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
792 }
793
794 void visitStoreInst(StoreInst &SI) {
795 Value *ValOp = SI.getValueOperand();
796 if (ValOp == *U)
797 return PI.setEscapedAndAborted(&SI);
798 if (!IsOffsetKnown)
799 return PI.setAborted(&SI);
800
801 if (isa<ScalableVectorType>(ValOp->getType()))
802 return PI.setAborted(&SI);
803
804 uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();
805
806 // If this memory access can be shown to *statically* extend outside the
807 // bounds of the allocation, it's behavior is undefined, so simply
808 // ignore it. Note that this is more strict than the generic clamping
809 // behavior of insertUse. We also try to handle cases which might run the
810 // risk of overflow.
811 // FIXME: We should instead consider the pointer to have escaped if this
812 // function is being instrumented for addressing bugs or race conditions.
813 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
814 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte store @" << Offset << " which extends past the end of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
SI << "\n"; } } while (false)
815 << Offset << " which extends past the end of the "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte store @" << Offset << " which extends past the end of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
SI << "\n"; } } while (false)
816 << AllocSize << " byte alloca:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte store @" << Offset << " which extends past the end of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
SI << "\n"; } } while (false)
817 << " alloca: " << AS.AI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte store @" << Offset << " which extends past the end of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
SI << "\n"; } } while (false)
818 << " use: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "WARNING: Ignoring " << Size
<< " byte store @" << Offset << " which extends past the end of the "
<< AllocSize << " byte alloca:\n" << " alloca: "
<< AS.AI << "\n" << " use: " <<
SI << "\n"; } } while (false)
;
819 return markAsDead(SI);
820 }
821
822 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&(static_cast <bool> ((!SI.isSimple() || ValOp->getType
()->isSingleValueType()) && "All simple FCA stores should have been pre-split"
) ? void (0) : __assert_fail ("(!SI.isSimple() || ValOp->getType()->isSingleValueType()) && \"All simple FCA stores should have been pre-split\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 823, __extension__ __PRETTY_FUNCTION__
))
823 "All simple FCA stores should have been pre-split")(static_cast <bool> ((!SI.isSimple() || ValOp->getType
()->isSingleValueType()) && "All simple FCA stores should have been pre-split"
) ? void (0) : __assert_fail ("(!SI.isSimple() || ValOp->getType()->isSingleValueType()) && \"All simple FCA stores should have been pre-split\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 823, __extension__ __PRETTY_FUNCTION__
))
;
824 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
825 }
826
827 void visitMemSetInst(MemSetInst &II) {
828 assert(II.getRawDest() == *U && "Pointer use is not the destination?")(static_cast <bool> (II.getRawDest() == *U && "Pointer use is not the destination?"
) ? void (0) : __assert_fail ("II.getRawDest() == *U && \"Pointer use is not the destination?\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 828, __extension__ __PRETTY_FUNCTION__
))
;
829 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
830 if ((Length && Length->getValue() == 0) ||
831 (IsOffsetKnown && Offset.uge(AllocSize)))
832 // Zero-length mem transfer intrinsics can be ignored entirely.
833 return markAsDead(II);
834
835 if (!IsOffsetKnown)
836 return PI.setAborted(&II);
837
838 insertUse(II, Offset, Length ? Length->getLimitedValue()
839 : AllocSize - Offset.getLimitedValue(),
840 (bool)Length);
841 }
842
843 void visitMemTransferInst(MemTransferInst &II) {
844 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
845 if (Length && Length->getValue() == 0)
846 // Zero-length mem transfer intrinsics can be ignored entirely.
847 return markAsDead(II);
848
849 // Because we can visit these intrinsics twice, also check to see if the
850 // first time marked this instruction as dead. If so, skip it.
851 if (VisitedDeadInsts.count(&II))
852 return;
853
854 if (!IsOffsetKnown)
855 return PI.setAborted(&II);
856
857 // This side of the transfer is completely out-of-bounds, and so we can
858 // nuke the entire transfer. However, we also need to nuke the other side
859 // if already added to our partitions.
860 // FIXME: Yet another place we really should bypass this when
861 // instrumenting for ASan.
862 if (Offset.uge(AllocSize)) {
863 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
864 MemTransferSliceMap.find(&II);
865 if (MTPI != MemTransferSliceMap.end())
866 AS.Slices[MTPI->second].kill();
867 return markAsDead(II);
868 }
869
870 uint64_t RawOffset = Offset.getLimitedValue();
871 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
872
873 // Check for the special case where the same exact value is used for both
874 // source and dest.
875 if (*U == II.getRawDest() && *U == II.getRawSource()) {
876 // For non-volatile transfers this is a no-op.
877 if (!II.isVolatile())
878 return markAsDead(II);
879
880 return insertUse(II, Offset, Size, /*IsSplittable=*/false);
881 }
882
883 // If we have seen both source and destination for a mem transfer, then
884 // they both point to the same alloca.
885 bool Inserted;
886 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
887 std::tie(MTPI, Inserted) =
888 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
889 unsigned PrevIdx = MTPI->second;
890 if (!Inserted) {
891 Slice &PrevP = AS.Slices[PrevIdx];
892
893 // Check if the begin offsets match and this is a non-volatile transfer.
894 // In that case, we can completely elide the transfer.
895 if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
896 PrevP.kill();
897 return markAsDead(II);
898 }
899
900 // Otherwise we have an offset transfer within the same alloca. We can't
901 // split those.
902 PrevP.makeUnsplittable();
903 }
904
905 // Insert the use now that we've fixed up the splittable nature.
906 insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
907
908 // Check that we ended up with a valid index in the map.
909 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&(static_cast <bool> (AS.Slices[PrevIdx].getUse()->getUser
() == &II && "Map index doesn't point back to a slice with this user."
) ? void (0) : __assert_fail ("AS.Slices[PrevIdx].getUse()->getUser() == &II && \"Map index doesn't point back to a slice with this user.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 910, __extension__ __PRETTY_FUNCTION__
))
910 "Map index doesn't point back to a slice with this user.")(static_cast <bool> (AS.Slices[PrevIdx].getUse()->getUser
() == &II && "Map index doesn't point back to a slice with this user."
) ? void (0) : __assert_fail ("AS.Slices[PrevIdx].getUse()->getUser() == &II && \"Map index doesn't point back to a slice with this user.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 910, __extension__ __PRETTY_FUNCTION__
))
;
911 }
912
913 // Disable SRoA for any intrinsics except for lifetime invariants and
914 // invariant group.
915 // FIXME: What about debug intrinsics? This matches old behavior, but
916 // doesn't make sense.
917 void visitIntrinsicInst(IntrinsicInst &II) {
918 if (II.isDroppable()) {
919 AS.DeadUseIfPromotable.push_back(U);
920 return;
921 }
922
923 if (!IsOffsetKnown)
924 return PI.setAborted(&II);
925
926 if (II.isLifetimeStartOrEnd()) {
927 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
928 uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
929 Length->getLimitedValue());
930 insertUse(II, Offset, Size, true);
931 return;
932 }
933
934 if (II.isLaunderOrStripInvariantGroup()) {
935 enqueueUsers(II);
936 return;
937 }
938
939 Base::visitIntrinsicInst(II);
940 }
941
942 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
943 // We consider any PHI or select that results in a direct load or store of
944 // the same offset to be a viable use for slicing purposes. These uses
945 // are considered unsplittable and the size is the maximum loaded or stored
946 // size.
947 SmallPtrSet<Instruction *, 4> Visited;
948 SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
949 Visited.insert(Root);
950 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
951 const DataLayout &DL = Root->getModule()->getDataLayout();
952 // If there are no loads or stores, the access is dead. We mark that as
953 // a size zero access.
954 Size = 0;
955 do {
956 Instruction *I, *UsedI;
957 std::tie(UsedI, I) = Uses.pop_back_val();
958
959 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
960 Size = std::max(Size,
961 DL.getTypeStoreSize(LI->getType()).getFixedSize());
962 continue;
963 }
964 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
965 Value *Op = SI->getOperand(0);
966 if (Op == UsedI)
967 return SI;
968 Size = std::max(Size,
969 DL.getTypeStoreSize(Op->getType()).getFixedSize());
970 continue;
971 }
972
973 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
974 if (!GEP->hasAllZeroIndices())
975 return GEP;
976 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
977 !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
978 return I;
979 }
980
981 for (User *U : I->users())
982 if (Visited.insert(cast<Instruction>(U)).second)
983 Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
984 } while (!Uses.empty());
985
986 return nullptr;
987 }
988
989 void visitPHINodeOrSelectInst(Instruction &I) {
990 assert(isa<PHINode>(I) || isa<SelectInst>(I))(static_cast <bool> (isa<PHINode>(I) || isa<SelectInst
>(I)) ? void (0) : __assert_fail ("isa<PHINode>(I) || isa<SelectInst>(I)"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 990, __extension__ __PRETTY_FUNCTION__
))
;
991 if (I.use_empty())
992 return markAsDead(I);
993
994 // If this is a PHI node before a catchswitch, we cannot insert any non-PHI
995 // instructions in this BB, which may be required during rewriting. Bail out
996 // on these cases.
997 if (isa<PHINode>(I) &&
998 I.getParent()->getFirstInsertionPt() == I.getParent()->end())
999 return PI.setAborted(&I);
1000
1001 // TODO: We could use simplifyInstruction here to fold PHINodes and
1002 // SelectInsts. However, doing so requires to change the current
1003 // dead-operand-tracking mechanism. For instance, suppose neither loading
1004 // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1005 // trap either. However, if we simply replace %U with undef using the
1006 // current dead-operand-tracking mechanism, "load (select undef, undef,
1007 // %other)" may trap because the select may return the first operand
1008 // "undef".
1009 if (Value *Result = foldPHINodeOrSelectInst(I)) {
1010 if (Result == *U)
1011 // If the result of the constant fold will be the pointer, recurse
1012 // through the PHI/select as if we had RAUW'ed it.
1013 enqueueUsers(I);
1014 else
1015 // Otherwise the operand to the PHI/select is dead, and we can replace
1016 // it with poison.
1017 AS.DeadOperands.push_back(U);
1018
1019 return;
1020 }
1021
1022 if (!IsOffsetKnown)
1023 return PI.setAborted(&I);
1024
1025 // See if we already have computed info on this node.
1026 uint64_t &Size = PHIOrSelectSizes[&I];
1027 if (!Size) {
1028 // This is a new PHI/Select, check for an unsafe use of it.
1029 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1030 return PI.setAborted(UnsafeI);
1031 }
1032
1033 // For PHI and select operands outside the alloca, we can't nuke the entire
1034 // phi or select -- the other side might still be relevant, so we special
1035 // case them here and use a separate structure to track the operands
1036 // themselves which should be replaced with poison.
1037 // FIXME: This should instead be escaped in the event we're instrumenting
1038 // for address sanitization.
1039 if (Offset.uge(AllocSize)) {
1040 AS.DeadOperands.push_back(U);
1041 return;
1042 }
1043
1044 insertUse(I, Offset, Size);
1045 }
1046
1047 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1048
1049 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1050
1051 /// Disable SROA entirely if there are unhandled users of the alloca.
1052 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1053};
1054
1055AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1056 :
1057#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1058 AI(AI),
1059#endif
1060 PointerEscapingInstr(nullptr) {
1061 SliceBuilder PB(DL, AI, *this);
1062 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1063 if (PtrI.isEscaped() || PtrI.isAborted()) {
1064 // FIXME: We should sink the escape vs. abort info into the caller nicely,
1065 // possibly by just storing the PtrInfo in the AllocaSlices.
1066 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1067 : PtrI.getAbortingInst();
1068 assert(PointerEscapingInstr && "Did not track a bad instruction")(static_cast <bool> (PointerEscapingInstr && "Did not track a bad instruction"
) ? void (0) : __assert_fail ("PointerEscapingInstr && \"Did not track a bad instruction\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1068, __extension__ __PRETTY_FUNCTION__
))
;
1069 return;
1070 }
1071
1072 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1073
1074 // Sort the uses. This arranges for the offsets to be in ascending order,
1075 // and the sizes to be in descending order.
1076 llvm::stable_sort(Slices);
1077}
1078
1079#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1080
1081void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1082 StringRef Indent) const {
1083 printSlice(OS, I, Indent);
1084 OS << "\n";
1085 printUse(OS, I, Indent);
1086}
1087
1088void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1089 StringRef Indent) const {
1090 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1091 << " slice #" << (I - begin())
1092 << (I->isSplittable() ? " (splittable)" : "");
1093}
1094
1095void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1096 StringRef Indent) const {
1097 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1098}
1099
1100void AllocaSlices::print(raw_ostream &OS) const {
1101 if (PointerEscapingInstr) {
1102 OS << "Can't analyze slices for alloca: " << AI << "\n"
1103 << " A pointer to this alloca escaped by:\n"
1104 << " " << *PointerEscapingInstr << "\n";
1105 return;
1106 }
1107
1108 OS << "Slices of alloca: " << AI << "\n";
1109 for (const_iterator I = begin(), E = end(); I != E; ++I)
1110 print(OS, I);
1111}
1112
1113LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void AllocaSlices::dump(const_iterator I) const {
1114 print(dbgs(), I);
1115}
1116LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void AllocaSlices::dump() const { print(dbgs()); }
1117
1118#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1119
1120/// Walk the range of a partitioning looking for a common type to cover this
1121/// sequence of slices.
1122static std::pair<Type *, IntegerType *>
1123findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1124 uint64_t EndOffset) {
1125 Type *Ty = nullptr;
1126 bool TyIsCommon = true;
1127 IntegerType *ITy = nullptr;
1128
1129 // Note that we need to look at *every* alloca slice's Use to ensure we
1130 // always get consistent results regardless of the order of slices.
1131 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1132 Use *U = I->getUse();
1133 if (isa<IntrinsicInst>(*U->getUser()))
1134 continue;
1135 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1136 continue;
1137
1138 Type *UserTy = nullptr;
1139 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1140 UserTy = LI->getType();
1141 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1142 UserTy = SI->getValueOperand()->getType();
1143 }
1144
1145 if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1146 // If the type is larger than the partition, skip it. We only encounter
1147 // this for split integer operations where we want to use the type of the
1148 // entity causing the split. Also skip if the type is not a byte width
1149 // multiple.
1150 if (UserITy->getBitWidth() % 8 != 0 ||
1151 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1152 continue;
1153
1154 // Track the largest bitwidth integer type used in this way in case there
1155 // is no common type.
1156 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1157 ITy = UserITy;
1158 }
1159
1160 // To avoid depending on the order of slices, Ty and TyIsCommon must not
1161 // depend on types skipped above.
1162 if (!UserTy || (Ty && Ty != UserTy))
1163 TyIsCommon = false; // Give up on anything but an iN type.
1164 else
1165 Ty = UserTy;
1166 }
1167
1168 return {TyIsCommon ? Ty : nullptr, ITy};
1169}
1170
1171/// PHI instructions that use an alloca and are subsequently loaded can be
1172/// rewritten to load both input pointers in the pred blocks and then PHI the
1173/// results, allowing the load of the alloca to be promoted.
1174/// From this:
1175/// %P2 = phi [i32* %Alloca, i32* %Other]
1176/// %V = load i32* %P2
1177/// to:
1178/// %V1 = load i32* %Alloca -> will be mem2reg'd
1179/// ...
1180/// %V2 = load i32* %Other
1181/// ...
1182/// %V = phi [i32 %V1, i32 %V2]
1183///
1184/// We can do this to a select if its only uses are loads and if the operands
1185/// to the select can be loaded unconditionally.
1186///
1187/// FIXME: This should be hoisted into a generic utility, likely in
1188/// Transforms/Util/Local.h
1189static bool isSafePHIToSpeculate(PHINode &PN) {
1190 const DataLayout &DL = PN.getModule()->getDataLayout();
1191
1192 // For now, we can only do this promotion if the load is in the same block
1193 // as the PHI, and if there are no stores between the phi and load.
1194 // TODO: Allow recursive phi users.
1195 // TODO: Allow stores.
1196 BasicBlock *BB = PN.getParent();
1197 Align MaxAlign;
1198 uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1199 Type *LoadType = nullptr;
1200 for (User *U : PN.users()) {
1201 LoadInst *LI = dyn_cast<LoadInst>(U);
1202 if (!LI || !LI->isSimple())
1203 return false;
1204
1205 // For now we only allow loads in the same block as the PHI. This is
1206 // a common case that happens when instcombine merges two loads through
1207 // a PHI.
1208 if (LI->getParent() != BB)
1209 return false;
1210
1211 if (LoadType) {
1212 if (LoadType != LI->getType())
1213 return false;
1214 } else {
1215 LoadType = LI->getType();
1216 }
1217
1218 // Ensure that there are no instructions between the PHI and the load that
1219 // could store.
1220 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1221 if (BBI->mayWriteToMemory())
1222 return false;
1223
1224 MaxAlign = std::max(MaxAlign, LI->getAlign());
1225 }
1226
1227 if (!LoadType)
1228 return false;
1229
1230 APInt LoadSize = APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedSize());
1231
1232 // We can only transform this if it is safe to push the loads into the
1233 // predecessor blocks. The only thing to watch out for is that we can't put
1234 // a possibly trapping load in the predecessor if it is a critical edge.
1235 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1236 Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator();
1237 Value *InVal = PN.getIncomingValue(Idx);
1238
1239 // If the value is produced by the terminator of the predecessor (an
1240 // invoke) or it has side-effects, there is no valid place to put a load
1241 // in the predecessor.
1242 if (TI == InVal || TI->mayHaveSideEffects())
1243 return false;
1244
1245 // If the predecessor has a single successor, then the edge isn't
1246 // critical.
1247 if (TI->getNumSuccessors() == 1)
1248 continue;
1249
1250 // If this pointer is always safe to load, or if we can prove that there
1251 // is already a load in the block, then we can move the load to the pred
1252 // block.
1253 if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
1254 continue;
1255
1256 return false;
1257 }
1258
1259 return true;
1260}
1261
1262static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
1263 LLVM_DEBUG(dbgs() << " original: " << PN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << PN <<
"\n"; } } while (false)
;
1264
1265 LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1266 Type *LoadTy = SomeLoad->getType();
1267 IRB.SetInsertPoint(&PN);
1268 PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1269 PN.getName() + ".sroa.speculated");
1270
1271 // Get the AA tags and alignment to use from one of the loads. It does not
1272 // matter which one we get and if any differ.
1273 AAMDNodes AATags = SomeLoad->getAAMetadata();
1274 Align Alignment = SomeLoad->getAlign();
1275
1276 // Rewrite all loads of the PN to use the new PHI.
1277 while (!PN.use_empty()) {
1278 LoadInst *LI = cast<LoadInst>(PN.user_back());
1279 LI->replaceAllUsesWith(NewPN);
1280 LI->eraseFromParent();
1281 }
1282
1283 // Inject loads into all of the pred blocks.
1284 DenseMap<BasicBlock*, Value*> InjectedLoads;
1285 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1286 BasicBlock *Pred = PN.getIncomingBlock(Idx);
1287 Value *InVal = PN.getIncomingValue(Idx);
1288
1289 // A PHI node is allowed to have multiple (duplicated) entries for the same
1290 // basic block, as long as the value is the same. So if we already injected
1291 // a load in the predecessor, then we should reuse the same load for all
1292 // duplicated entries.
1293 if (Value* V = InjectedLoads.lookup(Pred)) {
1294 NewPN->addIncoming(V, Pred);
1295 continue;
1296 }
1297
1298 Instruction *TI = Pred->getTerminator();
1299 IRB.SetInsertPoint(TI);
1300
1301 LoadInst *Load = IRB.CreateAlignedLoad(
1302 LoadTy, InVal, Alignment,
1303 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1304 ++NumLoadsSpeculated;
1305 if (AATags)
1306 Load->setAAMetadata(AATags);
1307 NewPN->addIncoming(Load, Pred);
1308 InjectedLoads[Pred] = Load;
1309 }
1310
1311 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " speculated to: " <<
*NewPN << "\n"; } } while (false)
;
1312 PN.eraseFromParent();
1313}
1314
1315sroa::SelectHandSpeculativity &
1316sroa::SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1317 if (isTrueVal)
1318 Bitfield::set<sroa::SelectHandSpeculativity::TrueVal>(Storage, true);
1319 else
1320 Bitfield::set<sroa::SelectHandSpeculativity::FalseVal>(Storage, true);
1321 return *this;
1322}
1323
1324bool sroa::SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1325 return isTrueVal
1326 ? Bitfield::get<sroa::SelectHandSpeculativity::TrueVal>(Storage)
1327 : Bitfield::get<sroa::SelectHandSpeculativity::FalseVal>(Storage);
1328}
1329
1330bool sroa::SelectHandSpeculativity::areAllSpeculatable() const {
1331 return isSpeculatable(/*isTrueVal=*/true) &&
1332 isSpeculatable(/*isTrueVal=*/false);
1333}
1334
1335bool sroa::SelectHandSpeculativity::areAnySpeculatable() const {
1336 return isSpeculatable(/*isTrueVal=*/true) ||
1337 isSpeculatable(/*isTrueVal=*/false);
1338}
1339bool sroa::SelectHandSpeculativity::areNoneSpeculatable() const {
1340 return !areAnySpeculatable();
1341}
1342
1343static sroa::SelectHandSpeculativity
1344isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG) {
1345 assert(LI.isSimple() && "Only for simple loads")(static_cast <bool> (LI.isSimple() && "Only for simple loads"
) ? void (0) : __assert_fail ("LI.isSimple() && \"Only for simple loads\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1345, __extension__ __PRETTY_FUNCTION__
))
;
1346 sroa::SelectHandSpeculativity Spec;
1347
1348 const DataLayout &DL = SI.getModule()->getDataLayout();
1349 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1350 if (isSafeToLoadUnconditionally(Value, LI.getType(), LI.getAlign(), DL,
1351 &LI))
1352 Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue());
1353 else if (PreserveCFG)
1354 return Spec;
1355
1356 return Spec;
1357}
1358
1359std::optional<sroa::RewriteableMemOps>
1360SROAPass::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1361 RewriteableMemOps Ops;
1362
1363 for (User *U : SI.users()) {
1364 if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
1365 U = *BC->user_begin();
1366
1367 if (auto *Store = dyn_cast<StoreInst>(U)) {
1368 // Note that atomic stores can be transformed; atomic semantics do not
1369 // have any meaning for a local alloca. Stores are not speculatable,
1370 // however, so if we can't turn it into a predicated store, we are done.
1371 if (Store->isVolatile() || PreserveCFG)
1372 return {}; // Give up on this `select`.
1373 Ops.emplace_back(Store);
1374 continue;
1375 }
1376
1377 auto *LI = dyn_cast<LoadInst>(U);
1378
1379 // Note that atomic loads can be transformed;
1380 // atomic semantics do not have any meaning for a local alloca.
1381 if (!LI || LI->isVolatile())
1382 return {}; // Give up on this `select`.
1383
1384 PossiblySpeculatableLoad Load(LI);
1385 if (!LI->isSimple()) {
1386 // If the `load` is not simple, we can't speculatively execute it,
1387 // but we could handle this via a CFG modification. But can we?
1388 if (PreserveCFG)
1389 return {}; // Give up on this `select`.
1390 Ops.emplace_back(Load);
1391 continue;
1392 }
1393
1394 sroa::SelectHandSpeculativity Spec =
1395 isSafeLoadOfSelectToSpeculate(*LI, SI, PreserveCFG);
1396 if (PreserveCFG && !Spec.areAllSpeculatable())
1397 return {}; // Give up on this `select`.
1398
1399 Load.setInt(Spec);
1400 Ops.emplace_back(Load);
1401 }
1402
1403 return Ops;
1404}
1405
1406static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI,
1407 IRBuilderTy &IRB) {
1408 LLVM_DEBUG(dbgs() << " original load: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original load: " << SI
<< "\n"; } } while (false)
;
1409
1410 IRB.SetInsertPoint(&SI);
1411 Value *TV = SI.getTrueValue();
1412 Value *FV = SI.getFalseValue();
1413 // Replace the given load of the select with a select of two loads.
1414
1415 assert(LI.isSimple() && "We only speculate simple loads")(static_cast <bool> (LI.isSimple() && "We only speculate simple loads"
) ? void (0) : __assert_fail ("LI.isSimple() && \"We only speculate simple loads\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1415, __extension__ __PRETTY_FUNCTION__
))
;
1416
1417 IRB.SetInsertPoint(&LI);
1418 LoadInst *TL =
1419 IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
1420 LI.getName() + ".sroa.speculate.load.true");
1421 LoadInst *FL =
1422 IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(),
1423 LI.getName() + ".sroa.speculate.load.false");
1424 NumLoadsSpeculated += 2;
1425
1426 // Transfer alignment and AA info if present.
1427 TL->setAlignment(LI.getAlign());
1428 FL->setAlignment(LI.getAlign());
1429
1430 AAMDNodes Tags = LI.getAAMetadata();
1431 if (Tags) {
1432 TL->setAAMetadata(Tags);
1433 FL->setAAMetadata(Tags);
1434 }
1435
1436 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1437 LI.getName() + ".sroa.speculated");
1438
1439 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " speculated to: " <<
*V << "\n"; } } while (false)
;
1440 LI.replaceAllUsesWith(V);
1441}
1442
1443template <typename T>
1444static void rewriteMemOpOfSelect(SelectInst &SI, T &I,
1445 sroa::SelectHandSpeculativity Spec,
1446 DomTreeUpdater &DTU) {
1447 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!")(static_cast <bool> ((isa<LoadInst>(I) || isa<
StoreInst>(I)) && "Only for load and store!") ? void
(0) : __assert_fail ("(isa<LoadInst>(I) || isa<StoreInst>(I)) && \"Only for load and store!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1447, __extension__ __PRETTY_FUNCTION__
))
;
1448 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original mem op: " << I
<< "\n"; } } while (false)
;
1449 BasicBlock *Head = I.getParent();
1450 Instruction *ThenTerm = nullptr;
1451 Instruction *ElseTerm = nullptr;
1452 if (Spec.areNoneSpeculatable())
1453 SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
1454 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1455 else {
1456 SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
1457 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1458 /*LI=*/nullptr, /*ThenBlock=*/nullptr);
1459 if (Spec.isSpeculatable(/*isTrueVal=*/true))
1460 cast<BranchInst>(Head->getTerminator())->swapSuccessors();
1461 }
1462 auto *HeadBI = cast<BranchInst>(Head->getTerminator());
1463 Spec = {}; // Do not use `Spec` beyond this point.
1464 BasicBlock *Tail = I.getParent();
1465 Tail->setName(Head->getName() + ".cont");
1466 PHINode *PN;
1467 if (isa<LoadInst>(I))
1468 PN = PHINode::Create(I.getType(), 2, "", &I);
1469 for (BasicBlock *SuccBB : successors(Head)) {
1470 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1471 int SuccIdx = IsThen ? 0 : 1;
1472 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1473 if (NewMemOpBB != Head) {
1474 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1475 if (isa<LoadInst>(I))
1476 ++NumLoadsPredicated;
1477 else
1478 ++NumStoresPredicated;
1479 } else
1480 ++NumLoadsSpeculated;
1481 auto &CondMemOp = cast<T>(*I.clone());
1482 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1483 Value *Ptr = SI.getOperand(1 + SuccIdx);
1484 if (auto *PtrTy = Ptr->getType();
1485 !PtrTy->isOpaquePointerTy() &&
1486 PtrTy != CondMemOp.getPointerOperandType())
1487 Ptr = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
1488 Ptr, CondMemOp.getPointerOperandType(), "", &CondMemOp);
1489 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1490 if (isa<LoadInst>(I)) {
1491 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1492 PN->addIncoming(&CondMemOp, NewMemOpBB);
1493 } else
1494 LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << CondMemOp
<< "\n"; } } while (false)
;
1495 }
1496 if (isa<LoadInst>(I)) {
1497 PN->takeName(&I);
1498 LLVM_DEBUG(dbgs() << " to: " << *PN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *PN <<
"\n"; } } while (false)
;
1499 I.replaceAllUsesWith(PN);
1500 }
1501}
1502
1503static void rewriteMemOpOfSelect(SelectInst &SelInst, Instruction &I,
1504 sroa::SelectHandSpeculativity Spec,
1505 DomTreeUpdater &DTU) {
1506 if (auto *LI = dyn_cast<LoadInst>(&I))
1507 rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
1508 else if (auto *SI = dyn_cast<StoreInst>(&I))
1509 rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
1510 else
1511 llvm_unreachable_internal("Only for load and store.");
1512}
1513
1514static bool rewriteSelectInstMemOps(SelectInst &SI,
1515 const sroa::RewriteableMemOps &Ops,
1516 IRBuilderTy &IRB, DomTreeUpdater *DTU) {
1517 bool CFGChanged = false;
1518 LLVM_DEBUG(dbgs() << " original select: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original select: " << SI
<< "\n"; } } while (false)
;
1519
1520 for (const RewriteableMemOp &Op : Ops) {
1521 sroa::SelectHandSpeculativity Spec;
1522 Instruction *I;
1523 if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
1524 I = *US;
1525 } else {
1526 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1527 I = PSL.getPointer();
1528 Spec = PSL.getInt();
1529 }
1530 if (Spec.areAllSpeculatable()) {
1531 speculateSelectInstLoads(SI, cast<LoadInst>(*I), IRB);
1532 } else {
1533 assert(DTU && "Should not get here when not allowed to modify the CFG!")(static_cast <bool> (DTU && "Should not get here when not allowed to modify the CFG!"
) ? void (0) : __assert_fail ("DTU && \"Should not get here when not allowed to modify the CFG!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1533, __extension__ __PRETTY_FUNCTION__
))
;
1534 rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
1535 CFGChanged = true;
1536 }
1537 I->eraseFromParent();
1538 }
1539
1540 for (User *U : make_early_inc_range(SI.users()))
1541 cast<BitCastInst>(U)->eraseFromParent();
1542 SI.eraseFromParent();
1543 return CFGChanged;
1544}
1545
1546/// Build a GEP out of a base pointer and indices.
1547///
1548/// This will return the BasePtr if that is valid, or build a new GEP
1549/// instruction using the IRBuilder if GEP-ing is needed.
1550static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
1551 SmallVectorImpl<Value *> &Indices,
1552 const Twine &NamePrefix) {
1553 if (Indices.empty())
1554 return BasePtr;
1555
1556 // A single zero index is a no-op, so check for this and avoid building a GEP
1557 // in that case.
1558 if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
1559 return BasePtr;
1560
1561 // buildGEP() is only called for non-opaque pointers.
1562 return IRB.CreateInBoundsGEP(
1563 BasePtr->getType()->getNonOpaquePointerElementType(), BasePtr, Indices,
1564 NamePrefix + "sroa_idx");
1565}
1566
1567/// Get a natural GEP off of the BasePtr walking through Ty toward
1568/// TargetTy without changing the offset of the pointer.
1569///
1570/// This routine assumes we've already established a properly offset GEP with
1571/// Indices, and arrived at the Ty type. The goal is to continue to GEP with
1572/// zero-indices down through type layers until we find one the same as
1573/// TargetTy. If we can't find one with the same type, we at least try to use
1574/// one with the same size. If none of that works, we just produce the GEP as
1575/// indicated by Indices to have the correct offset.
1576static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
1577 Value *BasePtr, Type *Ty, Type *TargetTy,
1578 SmallVectorImpl<Value *> &Indices,
1579 const Twine &NamePrefix) {
1580 if (Ty == TargetTy)
1581 return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1582
1583 // Offset size to use for the indices.
1584 unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType());
1585
1586 // See if we can descend into a struct and locate a field with the correct
1587 // type.
1588 unsigned NumLayers = 0;
1589 Type *ElementTy = Ty;
1590 do {
1591 if (ElementTy->isPointerTy())
1592 break;
1593
1594 if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
1595 ElementTy = ArrayTy->getElementType();
1596 Indices.push_back(IRB.getIntN(OffsetSize, 0));
1597 } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
1598 ElementTy = VectorTy->getElementType();
1599 Indices.push_back(IRB.getInt32(0));
1600 } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
1601 if (STy->element_begin() == STy->element_end())
1602 break; // Nothing left to descend into.
1603 ElementTy = *STy->element_begin();
1604 Indices.push_back(IRB.getInt32(0));
1605 } else {
1606 break;
1607 }
1608 ++NumLayers;
1609 } while (ElementTy != TargetTy);
1610 if (ElementTy != TargetTy)
1611 Indices.erase(Indices.end() - NumLayers, Indices.end());
1612
1613 return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1614}
1615
1616/// Get a natural GEP from a base pointer to a particular offset and
1617/// resulting in a particular type.
1618///
1619/// The goal is to produce a "natural" looking GEP that works with the existing
1620/// composite types to arrive at the appropriate offset and element type for
1621/// a pointer. TargetTy is the element type the returned GEP should point-to if
1622/// possible. We recurse by decreasing Offset, adding the appropriate index to
1623/// Indices, and setting Ty to the result subtype.
1624///
1625/// If no natural GEP can be constructed, this function returns null.
1626static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
1627 Value *Ptr, APInt Offset, Type *TargetTy,
1628 SmallVectorImpl<Value *> &Indices,
1629 const Twine &NamePrefix) {
1630 PointerType *Ty = cast<PointerType>(Ptr->getType());
1631
1632 // Don't consider any GEPs through an i8* as natural unless the TargetTy is
1633 // an i8.
1634 if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
1635 return nullptr;
1636
1637 Type *ElementTy = Ty->getNonOpaquePointerElementType();
1638 if (!ElementTy->isSized())
1639 return nullptr; // We can't GEP through an unsized element.
1640
1641 SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(ElementTy, Offset);
1642 if (Offset != 0)
1643 return nullptr;
1644
1645 for (const APInt &Index : IntIndices)
1646 Indices.push_back(IRB.getInt(Index));
1647 return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
1648 NamePrefix);
1649}
1650
1651/// Compute an adjusted pointer from Ptr by Offset bytes where the
1652/// resulting pointer has PointerTy.
1653///
1654/// This tries very hard to compute a "natural" GEP which arrives at the offset
1655/// and produces the pointer type desired. Where it cannot, it will try to use
1656/// the natural GEP to arrive at the offset and bitcast to the type. Where that
1657/// fails, it will try to use an existing i8* and GEP to the byte offset and
1658/// bitcast to the type.
1659///
1660/// The strategy for finding the more natural GEPs is to peel off layers of the
1661/// pointer, walking back through bit casts and GEPs, searching for a base
1662/// pointer from which we can compute a natural GEP with the desired
1663/// properties. The algorithm tries to fold as many constant indices into
1664/// a single GEP as possible, thus making each GEP more independent of the
1665/// surrounding code.
1666static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1667 APInt Offset, Type *PointerTy,
1668 const Twine &NamePrefix) {
1669 // Create i8 GEP for opaque pointers.
1670 if (Ptr->getType()->isOpaquePointerTy()) {
1671 if (Offset != 0)
1672 Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
1673 NamePrefix + "sroa_idx");
1674 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1675 NamePrefix + "sroa_cast");
1676 }
1677
1678 // Even though we don't look through PHI nodes, we could be called on an
1679 // instruction in an unreachable block, which may be on a cycle.
1680 SmallPtrSet<Value *, 4> Visited;
1681 Visited.insert(Ptr);
1682 SmallVector<Value *, 4> Indices;
1683
1684 // We may end up computing an offset pointer that has the wrong type. If we
1685 // never are able to compute one directly that has the correct type, we'll
1686 // fall back to it, so keep it and the base it was computed from around here.
1687 Value *OffsetPtr = nullptr;
1688 Value *OffsetBasePtr;
1689
1690 // Remember any i8 pointer we come across to re-use if we need to do a raw
1691 // byte offset.
1692 Value *Int8Ptr = nullptr;
1693 APInt Int8PtrOffset(Offset.getBitWidth(), 0);
1694
1695 PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
1696 Type *TargetTy = TargetPtrTy->getNonOpaquePointerElementType();
1697
1698 // As `addrspacecast` is , `Ptr` (the storage pointer) may have different
1699 // address space from the expected `PointerTy` (the pointer to be used).
1700 // Adjust the pointer type based the original storage pointer.
1701 auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
1702 PointerTy = TargetTy->getPointerTo(AS);
1703
1704 do {
1705 // First fold any existing GEPs into the offset.
1706 while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
1707 APInt GEPOffset(Offset.getBitWidth(), 0);
1708 if (!GEP->accumulateConstantOffset(DL, GEPOffset))
1709 break;
1710 Offset += GEPOffset;
1711 Ptr = GEP->getPointerOperand();
1712 if (!Visited.insert(Ptr).second)
1713 break;
1714 }
1715
1716 // See if we can perform a natural GEP here.
1717 Indices.clear();
1718 if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
1719 Indices, NamePrefix)) {
1720 // If we have a new natural pointer at the offset, clear out any old
1721 // offset pointer we computed. Unless it is the base pointer or
1722 // a non-instruction, we built a GEP we don't need. Zap it.
1723 if (OffsetPtr && OffsetPtr != OffsetBasePtr)
1724 if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
1725 assert(I->use_empty() && "Built a GEP with uses some how!")(static_cast <bool> (I->use_empty() && "Built a GEP with uses some how!"
) ? void (0) : __assert_fail ("I->use_empty() && \"Built a GEP with uses some how!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1725, __extension__ __PRETTY_FUNCTION__
))
;
1726 I->eraseFromParent();
1727 }
1728 OffsetPtr = P;
1729 OffsetBasePtr = Ptr;
1730 // If we also found a pointer of the right type, we're done.
1731 if (P->getType() == PointerTy)
1732 break;
1733 }
1734
1735 // Stash this pointer if we've found an i8*.
1736 if (Ptr->getType()->isIntegerTy(8)) {
1737 Int8Ptr = Ptr;
1738 Int8PtrOffset = Offset;
1739 }
1740
1741 // Peel off a layer of the pointer and update the offset appropriately.
1742 if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
1743 Ptr = cast<Operator>(Ptr)->getOperand(0);
1744 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
1745 if (GA->isInterposable())
1746 break;
1747 Ptr = GA->getAliasee();
1748 } else {
1749 break;
1750 }
1751 assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!")(static_cast <bool> (Ptr->getType()->isPointerTy(
) && "Unexpected operand type!") ? void (0) : __assert_fail
("Ptr->getType()->isPointerTy() && \"Unexpected operand type!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1751, __extension__ __PRETTY_FUNCTION__
))
;
1752 } while (Visited.insert(Ptr).second);
1753
1754 if (!OffsetPtr) {
1755 if (!Int8Ptr) {
1756 Int8Ptr = IRB.CreateBitCast(
1757 Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
1758 NamePrefix + "sroa_raw_cast");
1759 Int8PtrOffset = Offset;
1760 }
1761
1762 OffsetPtr = Int8PtrOffset == 0
1763 ? Int8Ptr
1764 : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
1765 IRB.getInt(Int8PtrOffset),
1766 NamePrefix + "sroa_raw_idx");
1767 }
1768 Ptr = OffsetPtr;
1769
1770 // On the off chance we were targeting i8*, guard the bitcast here.
1771 if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
1772 Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
1773 TargetPtrTy,
1774 NamePrefix + "sroa_cast");
1775 }
1776
1777 return Ptr;
1778}
1779
1780/// Compute the adjusted alignment for a load or store from an offset.
1781static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) {
1782 return commonAlignment(getLoadStoreAlignment(I), Offset);
1783}
1784
1785/// Test whether we can convert a value from the old to the new type.
1786///
1787/// This predicate should be used to guard calls to convertValue in order to
1788/// ensure that we only try to convert viable values. The strategy is that we
1789/// will peel off single element struct and array wrappings to get to an
1790/// underlying value, and convert that value.
1791static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
1792 if (OldTy == NewTy)
1793 return true;
1794
1795 // For integer types, we can't handle any bit-width differences. This would
1796 // break both vector conversions with extension and introduce endianness
1797 // issues when in conjunction with loads and stores.
1798 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1799 assert(cast<IntegerType>(OldTy)->getBitWidth() !=(static_cast <bool> (cast<IntegerType>(OldTy)->
getBitWidth() != cast<IntegerType>(NewTy)->getBitWidth
() && "We can't have the same bitwidth for different int types"
) ? void (0) : __assert_fail ("cast<IntegerType>(OldTy)->getBitWidth() != cast<IntegerType>(NewTy)->getBitWidth() && \"We can't have the same bitwidth for different int types\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1801, __extension__ __PRETTY_FUNCTION__
))
1800 cast<IntegerType>(NewTy)->getBitWidth() &&(static_cast <bool> (cast<IntegerType>(OldTy)->
getBitWidth() != cast<IntegerType>(NewTy)->getBitWidth
() && "We can't have the same bitwidth for different int types"
) ? void (0) : __assert_fail ("cast<IntegerType>(OldTy)->getBitWidth() != cast<IntegerType>(NewTy)->getBitWidth() && \"We can't have the same bitwidth for different int types\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1801, __extension__ __PRETTY_FUNCTION__
))
1801 "We can't have the same bitwidth for different int types")(static_cast <bool> (cast<IntegerType>(OldTy)->
getBitWidth() != cast<IntegerType>(NewTy)->getBitWidth
() && "We can't have the same bitwidth for different int types"
) ? void (0) : __assert_fail ("cast<IntegerType>(OldTy)->getBitWidth() != cast<IntegerType>(NewTy)->getBitWidth() && \"We can't have the same bitwidth for different int types\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1801, __extension__ __PRETTY_FUNCTION__
))
;
1802 return false;
1803 }
1804
1805 if (DL.getTypeSizeInBits(NewTy).getFixedSize() !=
1806 DL.getTypeSizeInBits(OldTy).getFixedSize())
1807 return false;
1808 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
1809 return false;
1810
1811 // We can convert pointers to integers and vice-versa. Same for vectors
1812 // of pointers and integers.
1813 OldTy = OldTy->getScalarType();
1814 NewTy = NewTy->getScalarType();
1815 if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
1816 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
1817 unsigned OldAS = OldTy->getPointerAddressSpace();
1818 unsigned NewAS = NewTy->getPointerAddressSpace();
1819 // Convert pointers if they are pointers from the same address space or
1820 // different integral (not non-integral) address spaces with the same
1821 // pointer size.
1822 return OldAS == NewAS ||
1823 (!DL.isNonIntegralAddressSpace(OldAS) &&
1824 !DL.isNonIntegralAddressSpace(NewAS) &&
1825 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
1826 }
1827
1828 // We can convert integers to integral pointers, but not to non-integral
1829 // pointers.
1830 if (OldTy->isIntegerTy())
1831 return !DL.isNonIntegralPointerType(NewTy);
1832
1833 // We can convert integral pointers to integers, but non-integral pointers
1834 // need to remain pointers.
1835 if (!DL.isNonIntegralPointerType(OldTy))
1836 return NewTy->isIntegerTy();
1837
1838 return false;
1839 }
1840
1841 return true;
1842}
1843
1844/// Generic routine to convert an SSA value to a value of a different
1845/// type.
1846///
1847/// This will try various different casting techniques, such as bitcasts,
1848/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
1849/// two types for viability with this routine.
1850static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
1851 Type *NewTy) {
1852 Type *OldTy = V->getType();
1853 assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type")(static_cast <bool> (canConvertValue(DL, OldTy, NewTy) &&
"Value not convertable to type") ? void (0) : __assert_fail (
"canConvertValue(DL, OldTy, NewTy) && \"Value not convertable to type\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1853, __extension__ __PRETTY_FUNCTION__
))
;
1854
1855 if (OldTy == NewTy)
1856 return V;
1857
1858 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&(static_cast <bool> (!(isa<IntegerType>(OldTy) &&
isa<IntegerType>(NewTy)) && "Integer types must be the exact same to convert."
) ? void (0) : __assert_fail ("!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) && \"Integer types must be the exact same to convert.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1859, __extension__ __PRETTY_FUNCTION__
))
1859 "Integer types must be the exact same to convert.")(static_cast <bool> (!(isa<IntegerType>(OldTy) &&
isa<IntegerType>(NewTy)) && "Integer types must be the exact same to convert."
) ? void (0) : __assert_fail ("!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) && \"Integer types must be the exact same to convert.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1859, __extension__ __PRETTY_FUNCTION__
))
;
1860
1861 // See if we need inttoptr for this type pair. May require additional bitcast.
1862 if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
1863 // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
1864 // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
1865 // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
1866 // Directly handle i64 to i8*
1867 return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
1868 NewTy);
1869 }
1870
1871 // See if we need ptrtoint for this type pair. May require additional bitcast.
1872 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
1873 // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
1874 // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
1875 // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
1876 // Expand i8* to i64 --> i8* to i64 to i64
1877 return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
1878 NewTy);
1879 }
1880
1881 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
1882 unsigned OldAS = OldTy->getPointerAddressSpace();
1883 unsigned NewAS = NewTy->getPointerAddressSpace();
1884 // To convert pointers with different address spaces (they are already
1885 // checked convertible, i.e. they have the same pointer size), so far we
1886 // cannot use `bitcast` (which has restrict on the same address space) or
1887 // `addrspacecast` (which is not always no-op casting). Instead, use a pair
1888 // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
1889 // size.
1890 if (OldAS != NewAS) {
1891 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS))(static_cast <bool> (DL.getPointerSize(OldAS) == DL.getPointerSize
(NewAS)) ? void (0) : __assert_fail ("DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS)"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1891, __extension__ __PRETTY_FUNCTION__
))
;
1892 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
1893 NewTy);
1894 }
1895 }
1896
1897 return IRB.CreateBitCast(V, NewTy);
1898}
1899
1900/// Test whether the given slice use can be promoted to a vector.
1901///
1902/// This function is called to test each entry in a partition which is slated
1903/// for a single slice.
1904static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
1905 VectorType *Ty,
1906 uint64_t ElementSize,
1907 const DataLayout &DL) {
1908 // First validate the slice offsets.
1909 uint64_t BeginOffset =
1910 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
1911 uint64_t BeginIndex = BeginOffset / ElementSize;
1912 if (BeginIndex * ElementSize != BeginOffset ||
1913 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
1914 return false;
1915 uint64_t EndOffset =
1916 std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
1917 uint64_t EndIndex = EndOffset / ElementSize;
1918 if (EndIndex * ElementSize != EndOffset ||
1919 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
1920 return false;
1921
1922 assert(EndIndex > BeginIndex && "Empty vector!")(static_cast <bool> (EndIndex > BeginIndex &&
"Empty vector!") ? void (0) : __assert_fail ("EndIndex > BeginIndex && \"Empty vector!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1922, __extension__ __PRETTY_FUNCTION__
))
;
1923 uint64_t NumElements = EndIndex - BeginIndex;
1924 Type *SliceTy = (NumElements == 1)
1925 ? Ty->getElementType()
1926 : FixedVectorType::get(Ty->getElementType(), NumElements);
1927
1928 Type *SplitIntTy =
1929 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
1930
1931 Use *U = S.getUse();
1932
1933 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
1934 if (MI->isVolatile())
1935 return false;
1936 if (!S.isSplittable())
1937 return false; // Skip any unsplittable intrinsics.
1938 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1939 if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
1940 return false;
1941 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1942 if (LI->isVolatile())
1943 return false;
1944 Type *LTy = LI->getType();
1945 // Disable vector promotion when there are loads or stores of an FCA.
1946 if (LTy->isStructTy())
1947 return false;
1948 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1949 assert(LTy->isIntegerTy())(static_cast <bool> (LTy->isIntegerTy()) ? void (0) :
__assert_fail ("LTy->isIntegerTy()", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 1949, __extension__ __PRETTY_FUNCTION__))
;
1950 LTy = SplitIntTy;
1951 }
1952 if (!canConvertValue(DL, SliceTy, LTy))
1953 return false;
1954 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1955 if (SI->isVolatile())
1956 return false;
1957 Type *STy = SI->getValueOperand()->getType();
1958 // Disable vector promotion when there are loads or stores of an FCA.
1959 if (STy->isStructTy())
1960 return false;
1961 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1962 assert(STy->isIntegerTy())(static_cast <bool> (STy->isIntegerTy()) ? void (0) :
__assert_fail ("STy->isIntegerTy()", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 1962, __extension__ __PRETTY_FUNCTION__))
;
1963 STy = SplitIntTy;
1964 }
1965 if (!canConvertValue(DL, STy, SliceTy))
1966 return false;
1967 } else {
1968 return false;
1969 }
1970
1971 return true;
1972}
1973
1974/// Test whether a vector type is viable for promotion.
1975///
1976/// This implements the necessary checking for \c isVectorPromotionViable over
1977/// all slices of the alloca for the given VectorType.
1978static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
1979 const DataLayout &DL) {
1980 uint64_t ElementSize =
1981 DL.getTypeSizeInBits(VTy->getElementType()).getFixedSize();
1982
1983 // While the definition of LLVM vectors is bitpacked, we don't support sizes
1984 // that aren't byte sized.
1985 if (ElementSize % 8)
1986 return false;
1987 assert((DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 &&(static_cast <bool> ((DL.getTypeSizeInBits(VTy).getFixedSize
() % 8) == 0 && "vector size not a multiple of element size?"
) ? void (0) : __assert_fail ("(DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 && \"vector size not a multiple of element size?\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1988, __extension__ __PRETTY_FUNCTION__
))
1988 "vector size not a multiple of element size?")(static_cast <bool> ((DL.getTypeSizeInBits(VTy).getFixedSize
() % 8) == 0 && "vector size not a multiple of element size?"
) ? void (0) : __assert_fail ("(DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 && \"vector size not a multiple of element size?\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 1988, __extension__ __PRETTY_FUNCTION__
))
;
1989 ElementSize /= 8;
1990
1991 for (const Slice &S : P)
1992 if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
1993 return false;
1994
1995 for (const Slice *S : P.splitSliceTails())
1996 if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
1997 return false;
1998
1999 return true;
2000}
2001
2002/// Test whether the given alloca partitioning and range of slices can be
2003/// promoted to a vector.
2004///
2005/// This is a quick test to check whether we can rewrite a particular alloca
2006/// partition (and its newly formed alloca) into a vector alloca with only
2007/// whole-vector loads and stores such that it could be promoted to a vector
2008/// SSA value. We only can ensure this for a limited set of operations, and we
2009/// don't want to do the rewrites unless we are confident that the result will
2010/// be promotable, so we have an early test here.
2011static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
2012 // Collect the candidate types for vector-based promotion. Also track whether
2013 // we have different element types.
2014 SmallVector<VectorType *, 4> CandidateTys;
2015 Type *CommonEltTy = nullptr;
2016 VectorType *CommonVecPtrTy = nullptr;
2017 bool HaveVecPtrTy = false;
2018 bool HaveCommonEltTy = true;
2019 bool HaveCommonVecPtrTy = true;
2020 auto CheckCandidateType = [&](Type *Ty) {
2021 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2022 // Return if bitcast to vectors is different for total size in bits.
2023 if (!CandidateTys.empty()) {
2024 VectorType *V = CandidateTys[0];
2025 if (DL.getTypeSizeInBits(VTy).getFixedSize() !=
2026 DL.getTypeSizeInBits(V).getFixedSize()) {
2027 CandidateTys.clear();
2028 return;
2029 }
2030 }
2031 CandidateTys.push_back(VTy);
2032 Type *EltTy = VTy->getElementType();
2033
2034 if (!CommonEltTy)
2035 CommonEltTy = EltTy;
2036 else if (CommonEltTy != EltTy)
2037 HaveCommonEltTy = false;
2038
2039 if (EltTy->isPointerTy()) {
2040 HaveVecPtrTy = true;
2041 if (!CommonVecPtrTy)
2042 CommonVecPtrTy = VTy;
2043 else if (CommonVecPtrTy != VTy)
2044 HaveCommonVecPtrTy = false;
2045 }
2046 }
2047 };
2048 // Consider any loads or stores that are the exact size of the slice.
2049 for (const Slice &S : P)
2050 if (S.beginOffset() == P.beginOffset() &&
2051 S.endOffset() == P.endOffset()) {
2052 if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2053 CheckCandidateType(LI->getType());
2054 else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2055 CheckCandidateType(SI->getValueOperand()->getType());
2056 }
2057
2058 // If we didn't find a vector type, nothing to do here.
2059 if (CandidateTys.empty())
2060 return nullptr;
2061
2062 // Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2063 // then we should choose it, not some other alternative.
2064 // But, we can't perform a no-op pointer address space change via bitcast,
2065 // so if we didn't have a common pointer element type, bail.
2066 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2067 return nullptr;
2068
2069 // Try to pick the "best" element type out of the choices.
2070 if (!HaveCommonEltTy && HaveVecPtrTy) {
2071 // If there was a pointer element type, there's really only one choice.
2072 CandidateTys.clear();
2073 CandidateTys.push_back(CommonVecPtrTy);
2074 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2075 // Integer-ify vector types.
2076 for (VectorType *&VTy : CandidateTys) {
2077 if (!VTy->getElementType()->isIntegerTy())
2078 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2079 VTy->getContext(), VTy->getScalarSizeInBits())));
2080 }
2081
2082 // Rank the remaining candidate vector types. This is easy because we know
2083 // they're all integer vectors. We sort by ascending number of elements.
2084 auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2085 (void)DL;
2086 assert(DL.getTypeSizeInBits(RHSTy).getFixedSize() ==(static_cast <bool> (DL.getTypeSizeInBits(RHSTy).getFixedSize
() == DL.getTypeSizeInBits(LHSTy).getFixedSize() && "Cannot have vector types of different sizes!"
) ? void (0) : __assert_fail ("DL.getTypeSizeInBits(RHSTy).getFixedSize() == DL.getTypeSizeInBits(LHSTy).getFixedSize() && \"Cannot have vector types of different sizes!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2088, __extension__ __PRETTY_FUNCTION__
))
2087 DL.getTypeSizeInBits(LHSTy).getFixedSize() &&(static_cast <bool> (DL.getTypeSizeInBits(RHSTy).getFixedSize
() == DL.getTypeSizeInBits(LHSTy).getFixedSize() && "Cannot have vector types of different sizes!"
) ? void (0) : __assert_fail ("DL.getTypeSizeInBits(RHSTy).getFixedSize() == DL.getTypeSizeInBits(LHSTy).getFixedSize() && \"Cannot have vector types of different sizes!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2088, __extension__ __PRETTY_FUNCTION__
))
2088 "Cannot have vector types of different sizes!")(static_cast <bool> (DL.getTypeSizeInBits(RHSTy).getFixedSize
() == DL.getTypeSizeInBits(LHSTy).getFixedSize() && "Cannot have vector types of different sizes!"
) ? void (0) : __assert_fail ("DL.getTypeSizeInBits(RHSTy).getFixedSize() == DL.getTypeSizeInBits(LHSTy).getFixedSize() && \"Cannot have vector types of different sizes!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2088, __extension__ __PRETTY_FUNCTION__
))
;
2089 assert(RHSTy->getElementType()->isIntegerTy() &&(static_cast <bool> (RHSTy->getElementType()->isIntegerTy
() && "All non-integer types eliminated!") ? void (0)
: __assert_fail ("RHSTy->getElementType()->isIntegerTy() && \"All non-integer types eliminated!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2090, __extension__ __PRETTY_FUNCTION__
))
2090 "All non-integer types eliminated!")(static_cast <bool> (RHSTy->getElementType()->isIntegerTy
() && "All non-integer types eliminated!") ? void (0)
: __assert_fail ("RHSTy->getElementType()->isIntegerTy() && \"All non-integer types eliminated!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2090, __extension__ __PRETTY_FUNCTION__
))
;
2091 assert(LHSTy->getElementType()->isIntegerTy() &&(static_cast <bool> (LHSTy->getElementType()->isIntegerTy
() && "All non-integer types eliminated!") ? void (0)
: __assert_fail ("LHSTy->getElementType()->isIntegerTy() && \"All non-integer types eliminated!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2092, __extension__ __PRETTY_FUNCTION__
))
2092 "All non-integer types eliminated!")(static_cast <bool> (LHSTy->getElementType()->isIntegerTy
() && "All non-integer types eliminated!") ? void (0)
: __assert_fail ("LHSTy->getElementType()->isIntegerTy() && \"All non-integer types eliminated!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2092, __extension__ __PRETTY_FUNCTION__
))
;
2093 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2094 cast<FixedVectorType>(LHSTy)->getNumElements();
2095 };
2096 llvm::sort(CandidateTys, RankVectorTypes);
2097 CandidateTys.erase(
2098 std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
2099 CandidateTys.end());
2100 } else {
2101// The only way to have the same element type in every vector type is to
2102// have the same vector type. Check that and remove all but one.
2103#ifndef NDEBUG
2104 for (VectorType *VTy : CandidateTys) {
2105 assert(VTy->getElementType() == CommonEltTy &&(static_cast <bool> (VTy->getElementType() == CommonEltTy
&& "Unaccounted for element type!") ? void (0) : __assert_fail
("VTy->getElementType() == CommonEltTy && \"Unaccounted for element type!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2106, __extension__ __PRETTY_FUNCTION__
))
2106 "Unaccounted for element type!")(static_cast <bool> (VTy->getElementType() == CommonEltTy
&& "Unaccounted for element type!") ? void (0) : __assert_fail
("VTy->getElementType() == CommonEltTy && \"Unaccounted for element type!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2106, __extension__ __PRETTY_FUNCTION__
))
;
2107 assert(VTy == CandidateTys[0] &&(static_cast <bool> (VTy == CandidateTys[0] && "Different vector types with the same element type!"
) ? void (0) : __assert_fail ("VTy == CandidateTys[0] && \"Different vector types with the same element type!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2108, __extension__ __PRETTY_FUNCTION__
))
2108 "Different vector types with the same element type!")(static_cast <bool> (VTy == CandidateTys[0] && "Different vector types with the same element type!"
) ? void (0) : __assert_fail ("VTy == CandidateTys[0] && \"Different vector types with the same element type!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2108, __extension__ __PRETTY_FUNCTION__
))
;
2109 }
2110#endif
2111 CandidateTys.resize(1);
2112 }
2113
2114 // FIXME: hack. Do we have a named constant for this?
2115 // SDAG SDNode can't have more than 65535 operands.
2116 llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2117 return cast<FixedVectorType>(VTy)->getNumElements() >
2118 std::numeric_limits<unsigned short>::max();
2119 });
2120
2121 for (VectorType *VTy : CandidateTys)
2122 if (checkVectorTypeForPromotion(P, VTy, DL))
2123 return VTy;
2124
2125 return nullptr;
2126}
2127
2128/// Test whether a slice of an alloca is valid for integer widening.
2129///
2130/// This implements the necessary checking for the \c isIntegerWideningViable
2131/// test below on a single slice of the alloca.
2132static bool isIntegerWideningViableForSlice(const Slice &S,
2133 uint64_t AllocBeginOffset,
2134 Type *AllocaTy,
2135 const DataLayout &DL,
2136 bool &WholeAllocaOp) {
2137 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedSize();
2138
2139 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2140 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2141
2142 Use *U = S.getUse();
2143
2144 // Lifetime intrinsics operate over the whole alloca whose sizes are usually
2145 // larger than other load/store slices (RelEnd > Size). But lifetime are
2146 // always promotable and should not impact other slices' promotability of the
2147 // partition.
2148 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2149 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2150 return true;
2151 }
2152
2153 // We can't reasonably handle cases where the load or store extends past
2154 // the end of the alloca's type and into its padding.
2155 if (RelEnd > Size)
2156 return false;
2157
2158 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2159 if (LI->isVolatile())
2160 return false;
2161 // We can't handle loads that extend past the allocated memory.
2162 if (DL.getTypeStoreSize(LI->getType()).getFixedSize() > Size)
2163 return false;
2164 // So far, AllocaSliceRewriter does not support widening split slice tails
2165 // in rewriteIntegerLoad.
2166 if (S.beginOffset() < AllocBeginOffset)
2167 return false;
2168 // Note that we don't count vector loads or stores as whole-alloca
2169 // operations which enable integer widening because we would prefer to use
2170 // vector widening instead.
2171 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2172 WholeAllocaOp = true;
2173 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2174 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize())
2175 return false;
2176 } else if (RelBegin != 0 || RelEnd != Size ||
2177 !canConvertValue(DL, AllocaTy, LI->getType())) {
2178 // Non-integer loads need to be convertible from the alloca type so that
2179 // they are promotable.
2180 return false;
2181 }
2182 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2183 Type *ValueTy = SI->getValueOperand()->getType();
2184 if (SI->isVolatile())
2185 return false;
2186 // We can't handle stores that extend past the allocated memory.
2187 if (DL.getTypeStoreSize(ValueTy).getFixedSize() > Size)
2188 return false;
2189 // So far, AllocaSliceRewriter does not support widening split slice tails
2190 // in rewriteIntegerStore.
2191 if (S.beginOffset() < AllocBeginOffset)
2192 return false;
2193 // Note that we don't count vector loads or stores as whole-alloca
2194 // operations which enable integer widening because we would prefer to use
2195 // vector widening instead.
2196 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2197 WholeAllocaOp = true;
2198 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2199 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize())
2200 return false;
2201 } else if (RelBegin != 0 || RelEnd != Size ||
2202 !canConvertValue(DL, ValueTy, AllocaTy)) {
2203 // Non-integer stores need to be convertible to the alloca type so that
2204 // they are promotable.
2205 return false;
2206 }
2207 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2208 if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2209 return false;
2210 if (!S.isSplittable())
2211 return false; // Skip any unsplittable intrinsics.
2212 } else {
2213 return false;
2214 }
2215
2216 return true;
2217}
2218
2219/// Test whether the given alloca partition's integer operations can be
2220/// widened to promotable ones.
2221///
2222/// This is a quick test to check whether we can rewrite the integer loads and
2223/// stores to a particular alloca into wider loads and stores and be able to
2224/// promote the resulting alloca.
2225static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2226 const DataLayout &DL) {
2227 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedSize();
2228 // Don't create integer types larger than the maximum bitwidth.
2229 if (SizeInBits > IntegerType::MAX_INT_BITS)
2230 return false;
2231
2232 // Don't try to handle allocas with bit-padding.
2233 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedSize())
2234 return false;
2235
2236 // We need to ensure that an integer type with the appropriate bitwidth can
2237 // be converted to the alloca type, whatever that is. We don't want to force
2238 // the alloca itself to have an integer type if there is a more suitable one.
2239 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2240 if (!canConvertValue(DL, AllocaTy, IntTy) ||
2241 !canConvertValue(DL, IntTy, AllocaTy))
2242 return false;
2243
2244 // While examining uses, we ensure that the alloca has a covering load or
2245 // store. We don't want to widen the integer operations only to fail to
2246 // promote due to some other unsplittable entry (which we may make splittable
2247 // later). However, if there are only splittable uses, go ahead and assume
2248 // that we cover the alloca.
2249 // FIXME: We shouldn't consider split slices that happen to start in the
2250 // partition here...
2251 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2252
2253 for (const Slice &S : P)
2254 if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2255 WholeAllocaOp))
2256 return false;
2257
2258 for (const Slice *S : P.splitSliceTails())
2259 if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2260 WholeAllocaOp))
2261 return false;
2262
2263 return WholeAllocaOp;
2264}
2265
2266static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2267 IntegerType *Ty, uint64_t Offset,
2268 const Twine &Name) {
2269 LLVM_DEBUG(dbgs() << " start: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " start: " << *V <<
"\n"; } } while (false)
;
2270 IntegerType *IntTy = cast<IntegerType>(V->getType());
2271 assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <=(static_cast <bool> (DL.getTypeStoreSize(Ty).getFixedSize
() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() &&
"Element extends past full value") ? void (0) : __assert_fail
("DL.getTypeStoreSize(Ty).getFixedSize() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() && \"Element extends past full value\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2273, __extension__ __PRETTY_FUNCTION__
))
2272 DL.getTypeStoreSize(IntTy).getFixedSize() &&(static_cast <bool> (DL.getTypeStoreSize(Ty).getFixedSize
() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() &&
"Element extends past full value") ? void (0) : __assert_fail
("DL.getTypeStoreSize(Ty).getFixedSize() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() && \"Element extends past full value\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2273, __extension__ __PRETTY_FUNCTION__
))
2273 "Element extends past full value")(static_cast <bool> (DL.getTypeStoreSize(Ty).getFixedSize
() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() &&
"Element extends past full value") ? void (0) : __assert_fail
("DL.getTypeStoreSize(Ty).getFixedSize() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() && \"Element extends past full value\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2273, __extension__ __PRETTY_FUNCTION__
))
;
2274 uint64_t ShAmt = 8 * Offset;
2275 if (DL.isBigEndian())
2276 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() -
2277 DL.getTypeStoreSize(Ty).getFixedSize() - Offset);
2278 if (ShAmt) {
2279 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2280 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " shifted: " << *V <<
"\n"; } } while (false)
;
2281 }
2282 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&(static_cast <bool> (Ty->getBitWidth() <= IntTy->
getBitWidth() && "Cannot extract to a larger integer!"
) ? void (0) : __assert_fail ("Ty->getBitWidth() <= IntTy->getBitWidth() && \"Cannot extract to a larger integer!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2283, __extension__ __PRETTY_FUNCTION__
))
2283 "Cannot extract to a larger integer!")(static_cast <bool> (Ty->getBitWidth() <= IntTy->
getBitWidth() && "Cannot extract to a larger integer!"
) ? void (0) : __assert_fail ("Ty->getBitWidth() <= IntTy->getBitWidth() && \"Cannot extract to a larger integer!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2283, __extension__ __PRETTY_FUNCTION__
))
;
2284 if (Ty != IntTy) {
2285 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2286 LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " trunced: " << *V <<
"\n"; } } while (false)
;
2287 }
2288 return V;
2289}
2290
2291static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2292 Value *V, uint64_t Offset, const Twine &Name) {
2293 IntegerType *IntTy = cast<IntegerType>(Old->getType());
2294 IntegerType *Ty = cast<IntegerType>(V->getType());
2295 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&(static_cast <bool> (Ty->getBitWidth() <= IntTy->
getBitWidth() && "Cannot insert a larger integer!") ?
void (0) : __assert_fail ("Ty->getBitWidth() <= IntTy->getBitWidth() && \"Cannot insert a larger integer!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2296, __extension__ __PRETTY_FUNCTION__
))
2296 "Cannot insert a larger integer!")(static_cast <bool> (Ty->getBitWidth() <= IntTy->
getBitWidth() && "Cannot insert a larger integer!") ?
void (0) : __assert_fail ("Ty->getBitWidth() <= IntTy->getBitWidth() && \"Cannot insert a larger integer!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2296, __extension__ __PRETTY_FUNCTION__
))
;
2297 LLVM_DEBUG(dbgs() << " start: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " start: " << *V <<
"\n"; } } while (false)
;
2298 if (Ty != IntTy) {
2299 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2300 LLVM_DEBUG(dbgs() << " extended: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " extended: " << *V <<
"\n"; } } while (false)
;
2301 }
2302 assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <=(static_cast <bool> (DL.getTypeStoreSize(Ty).getFixedSize
() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() &&
"Element store outside of alloca store") ? void (0) : __assert_fail
("DL.getTypeStoreSize(Ty).getFixedSize() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() && \"Element store outside of alloca store\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2304, __extension__ __PRETTY_FUNCTION__
))
2303 DL.getTypeStoreSize(IntTy).getFixedSize() &&(static_cast <bool> (DL.getTypeStoreSize(Ty).getFixedSize
() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() &&
"Element store outside of alloca store") ? void (0) : __assert_fail
("DL.getTypeStoreSize(Ty).getFixedSize() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() && \"Element store outside of alloca store\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2304, __extension__ __PRETTY_FUNCTION__
))
2304 "Element store outside of alloca store")(static_cast <bool> (DL.getTypeStoreSize(Ty).getFixedSize
() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() &&
"Element store outside of alloca store") ? void (0) : __assert_fail
("DL.getTypeStoreSize(Ty).getFixedSize() + Offset <= DL.getTypeStoreSize(IntTy).getFixedSize() && \"Element store outside of alloca store\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2304, __extension__ __PRETTY_FUNCTION__
))
;
2305 uint64_t ShAmt = 8 * Offset;
2306 if (DL.isBigEndian())
2307 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() -
2308 DL.getTypeStoreSize(Ty).getFixedSize() - Offset);
2309 if (ShAmt) {
2310 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2311 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " shifted: " << *V <<
"\n"; } } while (false)
;
2312 }
2313
2314 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2315 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2316 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2317 LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " masked: " << *Old <<
"\n"; } } while (false)
;
2318 V = IRB.CreateOr(Old, V, Name + ".insert");
2319 LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " inserted: " << *V <<
"\n"; } } while (false)
;
2320 }
2321 return V;
2322}
2323
2324static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2325 unsigned EndIndex, const Twine &Name) {
2326 auto *VecTy = cast<FixedVectorType>(V->getType());
2327 unsigned NumElements = EndIndex - BeginIndex;
2328 assert(NumElements <= VecTy->getNumElements() && "Too many elements!")(static_cast <bool> (NumElements <= VecTy->getNumElements
() && "Too many elements!") ? void (0) : __assert_fail
("NumElements <= VecTy->getNumElements() && \"Too many elements!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2328, __extension__ __PRETTY_FUNCTION__
))
;
2329
2330 if (NumElements == VecTy->getNumElements())
2331 return V;
2332
2333 if (NumElements == 1) {
2334 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2335 Name + ".extract");
2336 LLVM_DEBUG(dbgs() << " extract: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " extract: " << *V <<
"\n"; } } while (false)
;
2337 return V;
2338 }
2339
2340 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2341 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2342 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " shuffle: " << *V <<
"\n"; } } while (false)
;
2343 return V;
2344}
2345
2346static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2347 unsigned BeginIndex, const Twine &Name) {
2348 VectorType *VecTy = cast<VectorType>(Old->getType());
2349 assert(VecTy && "Can only insert a vector into a vector")(static_cast <bool> (VecTy && "Can only insert a vector into a vector"
) ? void (0) : __assert_fail ("VecTy && \"Can only insert a vector into a vector\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2349, __extension__ __PRETTY_FUNCTION__
))
;
2350
2351 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2352 if (!Ty) {
2353 // Single element to insert.
2354 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2355 Name + ".insert");
2356 LLVM_DEBUG(dbgs() << " insert: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " insert: " << *V <<
"\n"; } } while (false)
;
2357 return V;
2358 }
2359
2360 assert(cast<FixedVectorType>(Ty)->getNumElements() <=(static_cast <bool> (cast<FixedVectorType>(Ty)->
getNumElements() <= cast<FixedVectorType>(VecTy)->
getNumElements() && "Too many elements!") ? void (0) :
__assert_fail ("cast<FixedVectorType>(Ty)->getNumElements() <= cast<FixedVectorType>(VecTy)->getNumElements() && \"Too many elements!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2362, __extension__ __PRETTY_FUNCTION__
))
2361 cast<FixedVectorType>(VecTy)->getNumElements() &&(static_cast <bool> (cast<FixedVectorType>(Ty)->
getNumElements() <= cast<FixedVectorType>(VecTy)->
getNumElements() && "Too many elements!") ? void (0) :
__assert_fail ("cast<FixedVectorType>(Ty)->getNumElements() <= cast<FixedVectorType>(VecTy)->getNumElements() && \"Too many elements!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2362, __extension__ __PRETTY_FUNCTION__
))
2362 "Too many elements!")(static_cast <bool> (cast<FixedVectorType>(Ty)->
getNumElements() <= cast<FixedVectorType>(VecTy)->
getNumElements() && "Too many elements!") ? void (0) :
__assert_fail ("cast<FixedVectorType>(Ty)->getNumElements() <= cast<FixedVectorType>(VecTy)->getNumElements() && \"Too many elements!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2362, __extension__ __PRETTY_FUNCTION__
))
;
2363 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2364 cast<FixedVectorType>(VecTy)->getNumElements()) {
2365 assert(V->getType() == VecTy && "Vector type mismatch")(static_cast <bool> (V->getType() == VecTy &&
"Vector type mismatch") ? void (0) : __assert_fail ("V->getType() == VecTy && \"Vector type mismatch\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2365, __extension__ __PRETTY_FUNCTION__
))
;
2366 return V;
2367 }
2368 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2369
2370 // When inserting a smaller vector into the larger to store, we first
2371 // use a shuffle vector to widen it with undef elements, and then
2372 // a second shuffle vector to select between the loaded vector and the
2373 // incoming vector.
2374 SmallVector<int, 8> Mask;
2375 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2376 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2377 if (i >= BeginIndex && i < EndIndex)
2378 Mask.push_back(i - BeginIndex);
2379 else
2380 Mask.push_back(-1);
2381 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2382 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " shuffle: " << *V <<
"\n"; } } while (false)
;
2383
2384 SmallVector<Constant *, 8> Mask2;
2385 Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2386 for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2387 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2388
2389 V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
2390
2391 LLVM_DEBUG(dbgs() << " blend: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " blend: " << *V <<
"\n"; } } while (false)
;
2392 return V;
2393}
2394
2395/// Visitor to rewrite instructions using p particular slice of an alloca
2396/// to use a new alloca.
2397///
2398/// Also implements the rewriting to vector-based accesses when the partition
2399/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2400/// lives here.
2401class llvm::sroa::AllocaSliceRewriter
2402 : public InstVisitor<AllocaSliceRewriter, bool> {
2403 // Befriend the base class so it can delegate to private visit methods.
2404 friend class InstVisitor<AllocaSliceRewriter, bool>;
2405
2406 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2407
2408 const DataLayout &DL;
2409 AllocaSlices &AS;
2410 SROAPass &Pass;
2411 AllocaInst &OldAI, &NewAI;
2412 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2413 Type *NewAllocaTy;
2414
2415 // This is a convenience and flag variable that will be null unless the new
2416 // alloca's integer operations should be widened to this integer type due to
2417 // passing isIntegerWideningViable above. If it is non-null, the desired
2418 // integer type will be stored here for easy access during rewriting.
2419 IntegerType *IntTy;
2420
2421 // If we are rewriting an alloca partition which can be written as pure
2422 // vector operations, we stash extra information here. When VecTy is
2423 // non-null, we have some strict guarantees about the rewritten alloca:
2424 // - The new alloca is exactly the size of the vector type here.
2425 // - The accesses all either map to the entire vector or to a single
2426 // element.
2427 // - The set of accessing instructions is only one of those handled above
2428 // in isVectorPromotionViable. Generally these are the same access kinds
2429 // which are promotable via mem2reg.
2430 VectorType *VecTy;
2431 Type *ElementTy;
2432 uint64_t ElementSize;
2433
2434 // The original offset of the slice currently being rewritten relative to
2435 // the original alloca.
2436 uint64_t BeginOffset = 0;
2437 uint64_t EndOffset = 0;
2438
2439 // The new offsets of the slice currently being rewritten relative to the
2440 // original alloca.
2441 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2442
2443 uint64_t SliceSize = 0;
2444 bool IsSplittable = false;
2445 bool IsSplit = false;
2446 Use *OldUse = nullptr;
2447 Instruction *OldPtr = nullptr;
2448
2449 // Track post-rewrite users which are PHI nodes and Selects.
2450 SmallSetVector<PHINode *, 8> &PHIUsers;
2451 SmallSetVector<SelectInst *, 8> &SelectUsers;
2452
2453 // Utility IR builder, whose name prefix is setup for each visited use, and
2454 // the insertion point is set to point to the user.
2455 IRBuilderTy IRB;
2456
2457 // Return the new alloca, addrspacecasted if required to avoid changing the
2458 // addrspace of a volatile access.
2459 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2460 if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
2461 return &NewAI;
2462
2463 Type *AccessTy = NewAI.getAllocatedType()->getPointerTo(AddrSpace);
2464 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2465 }
2466
2467public:
2468 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
2469 AllocaInst &OldAI, AllocaInst &NewAI,
2470 uint64_t NewAllocaBeginOffset,
2471 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2472 VectorType *PromotableVecTy,
2473 SmallSetVector<PHINode *, 8> &PHIUsers,
2474 SmallSetVector<SelectInst *, 8> &SelectUsers)
2475 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2476 NewAllocaBeginOffset(NewAllocaBeginOffset),
2477 NewAllocaEndOffset(NewAllocaEndOffset),
2478 NewAllocaTy(NewAI.getAllocatedType()),
2479 IntTy(
2480 IsIntegerPromotable
2481 ? Type::getIntNTy(NewAI.getContext(),
2482 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2483 .getFixedSize())
2484 : nullptr),
2485 VecTy(PromotableVecTy),
2486 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2487 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8
2488 : 0),
2489 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2490 IRB(NewAI.getContext(), ConstantFolder()) {
2491 if (VecTy) {
2492 assert((DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 &&(static_cast <bool> ((DL.getTypeSizeInBits(ElementTy).getFixedSize
() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"
) ? void (0) : __assert_fail ("(DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 && \"Only multiple-of-8 sized vector elements are viable\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2493, __extension__ __PRETTY_FUNCTION__
))
2493 "Only multiple-of-8 sized vector elements are viable")(static_cast <bool> ((DL.getTypeSizeInBits(ElementTy).getFixedSize
() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"
) ? void (0) : __assert_fail ("(DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 && \"Only multiple-of-8 sized vector elements are viable\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2493, __extension__ __PRETTY_FUNCTION__
))
;
2494 ++NumVectorized;
2495 }
2496 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy))(static_cast <bool> ((!IntTy && !VecTy) || (IntTy
&& !VecTy) || (!IntTy && VecTy)) ? void (0) :
__assert_fail ("(!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy)"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2496, __extension__ __PRETTY_FUNCTION__
))
;
2497 }
2498
2499 bool visit(AllocaSlices::const_iterator I) {
2500 bool CanSROA = true;
2501 BeginOffset = I->beginOffset();
2502 EndOffset = I->endOffset();
2503 IsSplittable = I->isSplittable();
2504 IsSplit =
2505 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2506 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " rewriting " << (IsSplit ?
"split " : ""); } } while (false)
;
2507 LLVM_DEBUG(AS.printSlice(dbgs(), I, ""))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { AS.printSlice(dbgs(), I, ""); } } while (false)
;
2508 LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "\n"; } } while (false)
;
2509
2510 // Compute the intersecting offset range.
2511 assert(BeginOffset < NewAllocaEndOffset)(static_cast <bool> (BeginOffset < NewAllocaEndOffset
) ? void (0) : __assert_fail ("BeginOffset < NewAllocaEndOffset"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2511, __extension__ __PRETTY_FUNCTION__
))
;
2512 assert(EndOffset > NewAllocaBeginOffset)(static_cast <bool> (EndOffset > NewAllocaBeginOffset
) ? void (0) : __assert_fail ("EndOffset > NewAllocaBeginOffset"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2512, __extension__ __PRETTY_FUNCTION__
))
;
2513 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2514 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2515
2516 SliceSize = NewEndOffset - NewBeginOffset;
2517
2518 OldUse = I->getUse();
2519 OldPtr = cast<Instruction>(OldUse->get());
2520
2521 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2522 IRB.SetInsertPoint(OldUserI);
2523 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2524 IRB.getInserter().SetNamePrefix(
2525 Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");
2526
2527 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2528 if (VecTy || IntTy)
2529 assert(CanSROA)(static_cast <bool> (CanSROA) ? void (0) : __assert_fail
("CanSROA", "llvm/lib/Transforms/Scalar/SROA.cpp", 2529, __extension__
__PRETTY_FUNCTION__))
;
2530 return CanSROA;
2531 }
2532
2533private:
2534 // Make sure the other visit overloads are visible.
2535 using Base::visit;
2536
2537 // Every instruction which can end up as a user must have a rewrite rule.
2538 bool visitInstruction(Instruction &I) {
2539 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " !!!! Cannot rewrite: " <<
I << "\n"; } } while (false)
;
2540 llvm_unreachable("No rewrite rule for this instruction!")::llvm::llvm_unreachable_internal("No rewrite rule for this instruction!"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2540)
;
2541 }
2542
2543 Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
2544 // Note that the offset computation can use BeginOffset or NewBeginOffset
2545 // interchangeably for unsplit slices.
2546 assert(IsSplit || BeginOffset == NewBeginOffset)(static_cast <bool> (IsSplit || BeginOffset == NewBeginOffset
) ? void (0) : __assert_fail ("IsSplit || BeginOffset == NewBeginOffset"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2546, __extension__ __PRETTY_FUNCTION__
))
;
2547 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2548
2549#ifndef NDEBUG
2550 StringRef OldName = OldPtr->getName();
2551 // Skip through the last '.sroa.' component of the name.
2552 size_t LastSROAPrefix = OldName.rfind(".sroa.");
2553 if (LastSROAPrefix != StringRef::npos) {
2554 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
2555 // Look for an SROA slice index.
2556 size_t IndexEnd = OldName.find_first_not_of("0123456789");
2557 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
2558 // Strip the index and look for the offset.
2559 OldName = OldName.substr(IndexEnd + 1);
2560 size_t OffsetEnd = OldName.find_first_not_of("0123456789");
2561 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
2562 // Strip the offset.
2563 OldName = OldName.substr(OffsetEnd + 1);
2564 }
2565 }
2566 // Strip any SROA suffixes as well.
2567 OldName = OldName.substr(0, OldName.find(".sroa_"));
2568#endif
2569
2570 return getAdjustedPtr(IRB, DL, &NewAI,
2571 APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
2572 PointerTy,
2573#ifndef NDEBUG
2574 Twine(OldName) + "."
2575#else
2576 Twine()
2577#endif
2578 );
2579 }
2580
2581 /// Compute suitable alignment to access this slice of the *new*
2582 /// alloca.
2583 ///
2584 /// You can optionally pass a type to this routine and if that type's ABI
2585 /// alignment is itself suitable, this will return zero.
2586 Align getSliceAlign() {
2587 return commonAlignment(NewAI.getAlign(),
2588 NewBeginOffset - NewAllocaBeginOffset);
2589 }
2590
2591 unsigned getIndex(uint64_t Offset) {
2592 assert(VecTy && "Can only call getIndex when rewriting a vector")(static_cast <bool> (VecTy && "Can only call getIndex when rewriting a vector"
) ? void (0) : __assert_fail ("VecTy && \"Can only call getIndex when rewriting a vector\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2592, __extension__ __PRETTY_FUNCTION__
))
;
2593 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2594 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds")(static_cast <bool> (RelOffset / ElementSize < (4294967295U
) && "Index out of bounds") ? void (0) : __assert_fail
("RelOffset / ElementSize < UINT32_MAX && \"Index out of bounds\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2594, __extension__ __PRETTY_FUNCTION__
))
;
2595 uint32_t Index = RelOffset / ElementSize;
2596 assert(Index * ElementSize == RelOffset)(static_cast <bool> (Index * ElementSize == RelOffset) ?
void (0) : __assert_fail ("Index * ElementSize == RelOffset"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2596, __extension__ __PRETTY_FUNCTION__
))
;
2597 return Index;
2598 }
2599
2600 void deleteIfTriviallyDead(Value *V) {
2601 Instruction *I = cast<Instruction>(V);
2602 if (isInstructionTriviallyDead(I))
2603 Pass.DeadInsts.push_back(I);
2604 }
2605
2606 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
2607 unsigned BeginIndex = getIndex(NewBeginOffset);
2608 unsigned EndIndex = getIndex(NewEndOffset);
2609 assert(EndIndex > BeginIndex && "Empty vector!")(static_cast <bool> (EndIndex > BeginIndex &&
"Empty vector!") ? void (0) : __assert_fail ("EndIndex > BeginIndex && \"Empty vector!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2609, __extension__ __PRETTY_FUNCTION__
))
;
2610
2611 LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2612 NewAI.getAlign(), "load");
2613
2614 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2615 LLVMContext::MD_access_group});
2616 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
2617 }
2618
2619 Value *rewriteIntegerLoad(LoadInst &LI) {
2620 assert(IntTy && "We cannot insert an integer to the alloca")(static_cast <bool> (IntTy && "We cannot insert an integer to the alloca"
) ? void (0) : __assert_fail ("IntTy && \"We cannot insert an integer to the alloca\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2620, __extension__ __PRETTY_FUNCTION__
))
;
2621 assert(!LI.isVolatile())(static_cast <bool> (!LI.isVolatile()) ? void (0) : __assert_fail
("!LI.isVolatile()", "llvm/lib/Transforms/Scalar/SROA.cpp", 2621
, __extension__ __PRETTY_FUNCTION__))
;
2622 Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2623 NewAI.getAlign(), "load");
2624 V = convertValue(DL, IRB, V, IntTy);
2625 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset")(static_cast <bool> (NewBeginOffset >= NewAllocaBeginOffset
&& "Out of bounds offset") ? void (0) : __assert_fail
("NewBeginOffset >= NewAllocaBeginOffset && \"Out of bounds offset\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2625, __extension__ __PRETTY_FUNCTION__
))
;
2626 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2627 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2628 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
2629 V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
2630 }
2631 // It is possible that the extracted type is not the load type. This
2632 // happens if there is a load past the end of the alloca, and as
2633 // a consequence the slice is narrower but still a candidate for integer
2634 // lowering. To handle this case, we just zero extend the extracted
2635 // integer.
2636 assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&(static_cast <bool> (cast<IntegerType>(LI.getType
())->getBitWidth() >= SliceSize * 8 && "Can only handle an extract for an overly wide load"
) ? void (0) : __assert_fail ("cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 && \"Can only handle an extract for an overly wide load\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2637, __extension__ __PRETTY_FUNCTION__
))
2637 "Can only handle an extract for an overly wide load")(static_cast <bool> (cast<IntegerType>(LI.getType
())->getBitWidth() >= SliceSize * 8 && "Can only handle an extract for an overly wide load"
) ? void (0) : __assert_fail ("cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 && \"Can only handle an extract for an overly wide load\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2637, __extension__ __PRETTY_FUNCTION__
))
;
2638 if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
2639 V = IRB.CreateZExt(V, LI.getType());
2640 return V;
2641 }
2642
2643 bool visitLoadInst(LoadInst &LI) {
2644 LLVM_DEBUG(dbgs() << " original: " << LI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << LI <<
"\n"; } } while (false)
;
2645 Value *OldOp = LI.getOperand(0);
2646 assert(OldOp == OldPtr)(static_cast <bool> (OldOp == OldPtr) ? void (0) : __assert_fail
("OldOp == OldPtr", "llvm/lib/Transforms/Scalar/SROA.cpp", 2646
, __extension__ __PRETTY_FUNCTION__))
;
2647
2648 AAMDNodes AATags = LI.getAAMetadata();
2649
2650 unsigned AS = LI.getPointerAddressSpace();
2651
2652 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
2653 : LI.getType();
2654 const bool IsLoadPastEnd =
2655 DL.getTypeStoreSize(TargetTy).getFixedSize() > SliceSize;
2656 bool IsPtrAdjusted = false;
2657 Value *V;
2658 if (VecTy) {
2659 V = rewriteVectorizedLoadInst(LI);
2660 } else if (IntTy && LI.getType()->isIntegerTy()) {
2661 V = rewriteIntegerLoad(LI);
2662 } else if (NewBeginOffset == NewAllocaBeginOffset &&
2663 NewEndOffset == NewAllocaEndOffset &&
2664 (canConvertValue(DL, NewAllocaTy, TargetTy) ||
2665 (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
2666 TargetTy->isIntegerTy()))) {
2667 Value *NewPtr =
2668 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
2669 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
2670 NewAI.getAlign(), LI.isVolatile(),
2671 LI.getName());
2672 if (AATags)
2673 NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2674 if (LI.isVolatile())
2675 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2676 if (NewLI->isAtomic())
2677 NewLI->setAlignment(LI.getAlign());
2678
2679 // Any !nonnull metadata or !range metadata on the old load is also valid
2680 // on the new load. This is even true in some cases even when the loads
2681 // are different types, for example by mapping !nonnull metadata to
2682 // !range metadata by modeling the null pointer constant converted to the
2683 // integer type.
2684 // FIXME: Add support for range metadata here. Currently the utilities
2685 // for this don't propagate range metadata in trivial cases from one
2686 // integer load to another, don't handle non-addrspace-0 null pointers
2687 // correctly, and don't have any support for mapping ranges as the
2688 // integer type becomes winder or narrower.
2689 if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
2690 copyNonnullMetadata(LI, N, *NewLI);
2691
2692 // Try to preserve nonnull metadata
2693 V = NewLI;
2694
2695 // If this is an integer load past the end of the slice (which means the
2696 // bytes outside the slice are undef or this load is dead) just forcibly
2697 // fix the integer size with correct handling of endianness.
2698 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2699 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
2700 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2701 V = IRB.CreateZExt(V, TITy, "load.ext");
2702 if (DL.isBigEndian())
2703 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2704 "endian_shift");
2705 }
2706 } else {
2707 Type *LTy = TargetTy->getPointerTo(AS);
2708 LoadInst *NewLI =
2709 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2710 getSliceAlign(), LI.isVolatile(), LI.getName());
2711 if (AATags)
2712 NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2713 if (LI.isVolatile())
2714 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
2715 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2716 LLVMContext::MD_access_group});
2717
2718 V = NewLI;
2719 IsPtrAdjusted = true;
2720 }
2721 V = convertValue(DL, IRB, V, TargetTy);
2722
2723 if (IsSplit) {
2724 assert(!LI.isVolatile())(static_cast <bool> (!LI.isVolatile()) ? void (0) : __assert_fail
("!LI.isVolatile()", "llvm/lib/Transforms/Scalar/SROA.cpp", 2724
, __extension__ __PRETTY_FUNCTION__))
;
2725 assert(LI.getType()->isIntegerTy() &&(static_cast <bool> (LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split") ? void (0) :
__assert_fail ("LI.getType()->isIntegerTy() && \"Only integer type loads and stores are split\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2726, __extension__ __PRETTY_FUNCTION__
))
2726 "Only integer type loads and stores are split")(static_cast <bool> (LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split") ? void (0) :
__assert_fail ("LI.getType()->isIntegerTy() && \"Only integer type loads and stores are split\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2726, __extension__ __PRETTY_FUNCTION__
))
;
2727 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedSize() &&(static_cast <bool> (SliceSize < DL.getTypeStoreSize
(LI.getType()).getFixedSize() && "Split load isn't smaller than original load"
) ? void (0) : __assert_fail ("SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedSize() && \"Split load isn't smaller than original load\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2728, __extension__ __PRETTY_FUNCTION__
))
2728 "Split load isn't smaller than original load")(static_cast <bool> (SliceSize < DL.getTypeStoreSize
(LI.getType()).getFixedSize() && "Split load isn't smaller than original load"
) ? void (0) : __assert_fail ("SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedSize() && \"Split load isn't smaller than original load\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2728, __extension__ __PRETTY_FUNCTION__
))
;
2729 assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&(static_cast <bool> (DL.typeSizeEqualsStoreSize(LI.getType
()) && "Non-byte-multiple bit width") ? void (0) : __assert_fail
("DL.typeSizeEqualsStoreSize(LI.getType()) && \"Non-byte-multiple bit width\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2730, __extension__ __PRETTY_FUNCTION__
))
2730 "Non-byte-multiple bit width")(static_cast <bool> (DL.typeSizeEqualsStoreSize(LI.getType
()) && "Non-byte-multiple bit width") ? void (0) : __assert_fail
("DL.typeSizeEqualsStoreSize(LI.getType()) && \"Non-byte-multiple bit width\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2730, __extension__ __PRETTY_FUNCTION__
))
;
2731 // Move the insertion point just past the load so that we can refer to it.
2732 IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
2733 // Create a placeholder value with the same type as LI to use as the
2734 // basis for the new value. This allows us to replace the uses of LI with
2735 // the computed value, and then replace the placeholder with LI, leaving
2736 // LI only used for this computation.
2737 Value *Placeholder = new LoadInst(
2738 LI.getType(), PoisonValue::get(LI.getType()->getPointerTo(AS)), "",
2739 false, Align(1));
2740 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2741 "insert");
2742 LI.replaceAllUsesWith(V);
2743 Placeholder->replaceAllUsesWith(&LI);
2744 Placeholder->deleteValue();
2745 } else {
2746 LI.replaceAllUsesWith(V);
2747 }
2748
2749 Pass.DeadInsts.push_back(&LI);
2750 deleteIfTriviallyDead(OldOp);
2751 LLVM_DEBUG(dbgs() << " to: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *V <<
"\n"; } } while (false)
;
2752 return !LI.isVolatile() && !IsPtrAdjusted;
2753 }
2754
2755 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
2756 AAMDNodes AATags) {
2757 if (V->getType() != VecTy) {
2758 unsigned BeginIndex = getIndex(NewBeginOffset);
2759 unsigned EndIndex = getIndex(NewEndOffset);
2760 assert(EndIndex > BeginIndex && "Empty vector!")(static_cast <bool> (EndIndex > BeginIndex &&
"Empty vector!") ? void (0) : __assert_fail ("EndIndex > BeginIndex && \"Empty vector!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2760, __extension__ __PRETTY_FUNCTION__
))
;
2761 unsigned NumElements = EndIndex - BeginIndex;
2762 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&(static_cast <bool> (NumElements <= cast<FixedVectorType
>(VecTy)->getNumElements() && "Too many elements!"
) ? void (0) : __assert_fail ("NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() && \"Too many elements!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2763, __extension__ __PRETTY_FUNCTION__
))
2763 "Too many elements!")(static_cast <bool> (NumElements <= cast<FixedVectorType
>(VecTy)->getNumElements() && "Too many elements!"
) ? void (0) : __assert_fail ("NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() && \"Too many elements!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2763, __extension__ __PRETTY_FUNCTION__
))
;
2764 Type *SliceTy = (NumElements == 1)
2765 ? ElementTy
2766 : FixedVectorType::get(ElementTy, NumElements);
2767 if (V->getType() != SliceTy)
2768 V = convertValue(DL, IRB, V, SliceTy);
2769
2770 // Mix in the existing elements.
2771 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2772 NewAI.getAlign(), "load");
2773 V = insertVector(IRB, Old, V, BeginIndex, "vec");
2774 }
2775 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
2776 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2777 LLVMContext::MD_access_group});
2778 if (AATags)
2779 Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2780 Pass.DeadInsts.push_back(&SI);
2781
2782 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *Store <<
"\n"; } } while (false)
;
2783 return true;
2784 }
2785
2786 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
2787 assert(IntTy && "We cannot extract an integer from the alloca")(static_cast <bool> (IntTy && "We cannot extract an integer from the alloca"
) ? void (0) : __assert_fail ("IntTy && \"We cannot extract an integer from the alloca\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2787, __extension__ __PRETTY_FUNCTION__
))
;
2788 assert(!SI.isVolatile())(static_cast <bool> (!SI.isVolatile()) ? void (0) : __assert_fail
("!SI.isVolatile()", "llvm/lib/Transforms/Scalar/SROA.cpp", 2788
, __extension__ __PRETTY_FUNCTION__))
;
2789 if (DL.getTypeSizeInBits(V->getType()).getFixedSize() !=
2790 IntTy->getBitWidth()) {
2791 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
2792 NewAI.getAlign(), "oldload");
2793 Old = convertValue(DL, IRB, Old, IntTy);
2794 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset")(static_cast <bool> (BeginOffset >= NewAllocaBeginOffset
&& "Out of bounds offset") ? void (0) : __assert_fail
("BeginOffset >= NewAllocaBeginOffset && \"Out of bounds offset\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2794, __extension__ __PRETTY_FUNCTION__
))
;
2795 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
2796 V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
2797 }
2798 V = convertValue(DL, IRB, V, NewAllocaTy);
2799 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
2800 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2801 LLVMContext::MD_access_group});
2802 if (AATags)
2803 Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2804 Pass.DeadInsts.push_back(&SI);
2805 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *Store <<
"\n"; } } while (false)
;
2806 return true;
2807 }
2808
2809 bool visitStoreInst(StoreInst &SI) {
2810 LLVM_DEBUG(dbgs() << " original: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << SI <<
"\n"; } } while (false)
;
2811 Value *OldOp = SI.getOperand(1);
2812 assert(OldOp == OldPtr)(static_cast <bool> (OldOp == OldPtr) ? void (0) : __assert_fail
("OldOp == OldPtr", "llvm/lib/Transforms/Scalar/SROA.cpp", 2812
, __extension__ __PRETTY_FUNCTION__))
;
2813
2814 AAMDNodes AATags = SI.getAAMetadata();
2815 Value *V = SI.getValueOperand();
2816
2817 // Strip all inbounds GEPs and pointer casts to try to dig out any root
2818 // alloca that should be re-examined after promoting this alloca.
2819 if (V->getType()->isPointerTy())
2820 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
2821 Pass.PostPromotionWorklist.insert(AI);
2822
2823 if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedSize()) {
2824 assert(!SI.isVolatile())(static_cast <bool> (!SI.isVolatile()) ? void (0) : __assert_fail
("!SI.isVolatile()", "llvm/lib/Transforms/Scalar/SROA.cpp", 2824
, __extension__ __PRETTY_FUNCTION__))
;
2825 assert(V->getType()->isIntegerTy() &&(static_cast <bool> (V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split") ? void (0) :
__assert_fail ("V->getType()->isIntegerTy() && \"Only integer type loads and stores are split\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2826, __extension__ __PRETTY_FUNCTION__
))
2826 "Only integer type loads and stores are split")(static_cast <bool> (V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split") ? void (0) :
__assert_fail ("V->getType()->isIntegerTy() && \"Only integer type loads and stores are split\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2826, __extension__ __PRETTY_FUNCTION__
))
;
2827 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&(static_cast <bool> (DL.typeSizeEqualsStoreSize(V->getType
()) && "Non-byte-multiple bit width") ? void (0) : __assert_fail
("DL.typeSizeEqualsStoreSize(V->getType()) && \"Non-byte-multiple bit width\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2828, __extension__ __PRETTY_FUNCTION__
))
2828 "Non-byte-multiple bit width")(static_cast <bool> (DL.typeSizeEqualsStoreSize(V->getType
()) && "Non-byte-multiple bit width") ? void (0) : __assert_fail
("DL.typeSizeEqualsStoreSize(V->getType()) && \"Non-byte-multiple bit width\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2828, __extension__ __PRETTY_FUNCTION__
))
;
2829 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
2830 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
2831 "extract");
2832 }
2833
2834 if (VecTy)
2835 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
2836 if (IntTy && V->getType()->isIntegerTy())
2837 return rewriteIntegerStore(V, SI, AATags);
2838
2839 const bool IsStorePastEnd =
2840 DL.getTypeStoreSize(V->getType()).getFixedSize() > SliceSize;
2841 StoreInst *NewSI;
2842 if (NewBeginOffset == NewAllocaBeginOffset &&
2843 NewEndOffset == NewAllocaEndOffset &&
2844 (canConvertValue(DL, V->getType(), NewAllocaTy) ||
2845 (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
2846 V->getType()->isIntegerTy()))) {
2847 // If this is an integer store past the end of slice (and thus the bytes
2848 // past that point are irrelevant or this is unreachable), truncate the
2849 // value prior to storing.
2850 if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
2851 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2852 if (VITy->getBitWidth() > AITy->getBitWidth()) {
2853 if (DL.isBigEndian())
2854 V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2855 "endian_shift");
2856 V = IRB.CreateTrunc(V, AITy, "load.trunc");
2857 }
2858
2859 V = convertValue(DL, IRB, V, NewAllocaTy);
2860 Value *NewPtr =
2861 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
2862
2863 NewSI =
2864 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
2865 } else {
2866 unsigned AS = SI.getPointerAddressSpace();
2867 Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
2868 NewSI =
2869 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
2870 }
2871 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2872 LLVMContext::MD_access_group});
2873 if (AATags)
2874 NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2875 if (SI.isVolatile())
2876 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
2877 if (NewSI->isAtomic())
2878 NewSI->setAlignment(SI.getAlign());
2879 Pass.DeadInsts.push_back(&SI);
2880 deleteIfTriviallyDead(OldOp);
2881
2882 LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *NewSI <<
"\n"; } } while (false)
;
2883 return NewSI->getPointerOperand() == &NewAI &&
2884 NewSI->getValueOperand()->getType() == NewAllocaTy &&
2885 !SI.isVolatile();
2886 }
2887
2888 /// Compute an integer value from splatting an i8 across the given
2889 /// number of bytes.
2890 ///
2891 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
2892 /// call this routine.
2893 /// FIXME: Heed the advice above.
2894 ///
2895 /// \param V The i8 value to splat.
2896 /// \param Size The number of bytes in the output (assuming i8 is one byte)
2897 Value *getIntegerSplat(Value *V, unsigned Size) {
2898 assert(Size > 0 && "Expected a positive number of bytes.")(static_cast <bool> (Size > 0 && "Expected a positive number of bytes."
) ? void (0) : __assert_fail ("Size > 0 && \"Expected a positive number of bytes.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2898, __extension__ __PRETTY_FUNCTION__
))
;
2899 IntegerType *VTy = cast<IntegerType>(V->getType());
2900 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte")(static_cast <bool> (VTy->getBitWidth() == 8 &&
"Expected an i8 value for the byte") ? void (0) : __assert_fail
("VTy->getBitWidth() == 8 && \"Expected an i8 value for the byte\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2900, __extension__ __PRETTY_FUNCTION__
))
;
2901 if (Size == 1)
2902 return V;
2903
2904 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
2905 V = IRB.CreateMul(
2906 IRB.CreateZExt(V, SplatIntTy, "zext"),
2907 IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
2908 IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
2909 SplatIntTy)),
2910 "isplat");
2911 return V;
2912 }
2913
2914 /// Compute a vector splat for a given element value.
2915 Value *getVectorSplat(Value *V, unsigned NumElements) {
2916 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
2917 LLVM_DEBUG(dbgs() << " splat: " << *V << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " splat: " << *V <<
"\n"; } } while (false)
;
2918 return V;
2919 }
2920
2921 bool visitMemSetInst(MemSetInst &II) {
2922 LLVM_DEBUG(dbgs() << " original: " << II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << II <<
"\n"; } } while (false)
;
2923 assert(II.getRawDest() == OldPtr)(static_cast <bool> (II.getRawDest() == OldPtr) ? void (
0) : __assert_fail ("II.getRawDest() == OldPtr", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 2923, __extension__ __PRETTY_FUNCTION__))
;
2924
2925 AAMDNodes AATags = II.getAAMetadata();
2926
2927 // If the memset has a variable size, it cannot be split, just adjust the
2928 // pointer to the new alloca.
2929 if (!isa<ConstantInt>(II.getLength())) {
2930 assert(!IsSplit)(static_cast <bool> (!IsSplit) ? void (0) : __assert_fail
("!IsSplit", "llvm/lib/Transforms/Scalar/SROA.cpp", 2930, __extension__
__PRETTY_FUNCTION__))
;
2931 assert(NewBeginOffset == BeginOffset)(static_cast <bool> (NewBeginOffset == BeginOffset) ? void
(0) : __assert_fail ("NewBeginOffset == BeginOffset", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 2931, __extension__ __PRETTY_FUNCTION__))
;
2932 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
2933 II.setDestAlignment(getSliceAlign());
2934
2935 deleteIfTriviallyDead(OldPtr);
2936 return false;
2937 }
2938
2939 // Record this instruction for deletion.
2940 Pass.DeadInsts.push_back(&II);
2941
2942 Type *AllocaTy = NewAI.getAllocatedType();
2943 Type *ScalarTy = AllocaTy->getScalarType();
2944
2945 const bool CanContinue = [&]() {
2946 if (VecTy || IntTy)
2947 return true;
2948 if (BeginOffset > NewAllocaBeginOffset ||
2949 EndOffset < NewAllocaEndOffset)
2950 return false;
2951 // Length must be in range for FixedVectorType.
2952 auto *C = cast<ConstantInt>(II.getLength());
2953 const uint64_t Len = C->getLimitedValue();
2954 if (Len > std::numeric_limits<unsigned>::max())
2955 return false;
2956 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
2957 auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
2958 return canConvertValue(DL, SrcTy, AllocaTy) &&
2959 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedSize());
2960 }();
2961
2962 // If this doesn't map cleanly onto the alloca type, and that type isn't
2963 // a single value type, just emit a memset.
2964 if (!CanContinue) {
2965 Type *SizeTy = II.getLength()->getType();
2966 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
2967 CallInst *New = IRB.CreateMemSet(
2968 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
2969 MaybeAlign(getSliceAlign()), II.isVolatile());
2970 if (AATags)
2971 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
2972 LLVM_DEBUG(dbgs() << " to: " << *New << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *New <<
"\n"; } } while (false)
;
2973 return false;
2974 }
2975
2976 // If we can represent this as a simple value, we have to build the actual
2977 // value to store, which requires expanding the byte present in memset to
2978 // a sensible representation for the alloca type. This is essentially
2979 // splatting the byte to a sufficiently wide integer, splatting it across
2980 // any desired vector width, and bitcasting to the final type.
2981 Value *V;
2982
2983 if (VecTy) {
2984 // If this is a memset of a vectorized alloca, insert it.
2985 assert(ElementTy == ScalarTy)(static_cast <bool> (ElementTy == ScalarTy) ? void (0) :
__assert_fail ("ElementTy == ScalarTy", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 2985, __extension__ __PRETTY_FUNCTION__))
;
2986
2987 unsigned BeginIndex = getIndex(NewBeginOffset);
2988 unsigned EndIndex = getIndex(NewEndOffset);
2989 assert(EndIndex > BeginIndex && "Empty vector!")(static_cast <bool> (EndIndex > BeginIndex &&
"Empty vector!") ? void (0) : __assert_fail ("EndIndex > BeginIndex && \"Empty vector!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2989, __extension__ __PRETTY_FUNCTION__
))
;
2990 unsigned NumElements = EndIndex - BeginIndex;
2991 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&(static_cast <bool> (NumElements <= cast<FixedVectorType
>(VecTy)->getNumElements() && "Too many elements!"
) ? void (0) : __assert_fail ("NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() && \"Too many elements!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2992, __extension__ __PRETTY_FUNCTION__
))
2992 "Too many elements!")(static_cast <bool> (NumElements <= cast<FixedVectorType
>(VecTy)->getNumElements() && "Too many elements!"
) ? void (0) : __assert_fail ("NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() && \"Too many elements!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 2992, __extension__ __PRETTY_FUNCTION__
))
;
2993
2994 Value *Splat = getIntegerSplat(
2995 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8);
2996 Splat = convertValue(DL, IRB, Splat, ElementTy);
2997 if (NumElements > 1)
2998 Splat = getVectorSplat(Splat, NumElements);
2999
3000 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3001 NewAI.getAlign(), "oldload");
3002 V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
3003 } else if (IntTy) {
3004 // If this is a memset on an alloca where we can widen stores, insert the
3005 // set integer.
3006 assert(!II.isVolatile())(static_cast <bool> (!II.isVolatile()) ? void (0) : __assert_fail
("!II.isVolatile()", "llvm/lib/Transforms/Scalar/SROA.cpp", 3006
, __extension__ __PRETTY_FUNCTION__))
;
3007
3008 uint64_t Size = NewEndOffset - NewBeginOffset;
3009 V = getIntegerSplat(II.getValue(), Size);
3010
3011 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3012 EndOffset != NewAllocaBeginOffset)) {
3013 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3014 NewAI.getAlign(), "oldload");
3015 Old = convertValue(DL, IRB, Old, IntTy);
3016 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3017 V = insertInteger(DL, IRB, Old, V, Offset, "insert");
3018 } else {
3019 assert(V->getType() == IntTy &&(static_cast <bool> (V->getType() == IntTy &&
"Wrong type for an alloca wide integer!") ? void (0) : __assert_fail
("V->getType() == IntTy && \"Wrong type for an alloca wide integer!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3020, __extension__ __PRETTY_FUNCTION__
))
3020 "Wrong type for an alloca wide integer!")(static_cast <bool> (V->getType() == IntTy &&
"Wrong type for an alloca wide integer!") ? void (0) : __assert_fail
("V->getType() == IntTy && \"Wrong type for an alloca wide integer!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3020, __extension__ __PRETTY_FUNCTION__
))
;
3021 }
3022 V = convertValue(DL, IRB, V, AllocaTy);
3023 } else {
3024 // Established these invariants above.
3025 assert(NewBeginOffset == NewAllocaBeginOffset)(static_cast <bool> (NewBeginOffset == NewAllocaBeginOffset
) ? void (0) : __assert_fail ("NewBeginOffset == NewAllocaBeginOffset"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3025, __extension__ __PRETTY_FUNCTION__
))
;
3026 assert(NewEndOffset == NewAllocaEndOffset)(static_cast <bool> (NewEndOffset == NewAllocaEndOffset
) ? void (0) : __assert_fail ("NewEndOffset == NewAllocaEndOffset"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3026, __extension__ __PRETTY_FUNCTION__
))
;
3027
3028 V = getIntegerSplat(II.getValue(),
3029 DL.getTypeSizeInBits(ScalarTy).getFixedSize() / 8);
3030 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3031 V = getVectorSplat(
3032 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3033
3034 V = convertValue(DL, IRB, V, AllocaTy);
3035 }
3036
3037 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3038 StoreInst *New =
3039 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3040 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3041 LLVMContext::MD_access_group});
3042 if (AATags)
3043 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3044 LLVM_DEBUG(dbgs() << " to: " << *New << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *New <<
"\n"; } } while (false)
;
3045 return !II.isVolatile();
3046 }
3047
3048 bool visitMemTransferInst(MemTransferInst &II) {
3049 // Rewriting of memory transfer instructions can be a bit tricky. We break
3050 // them into two categories: split intrinsics and unsplit intrinsics.
3051
3052 LLVM_DEBUG(dbgs() << " original: " << II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << II <<
"\n"; } } while (false)
;
1
Assuming 'DebugFlag' is false
2
Loop condition is false. Exiting loop
3053
3054 AAMDNodes AATags = II.getAAMetadata();
3055
3056 bool IsDest = &II.getRawDestUse() == OldUse;
3057 assert((IsDest && II.getRawDest() == OldPtr) ||(static_cast <bool> ((IsDest && II.getRawDest()
== OldPtr) || (!IsDest && II.getRawSource() == OldPtr
)) ? void (0) : __assert_fail ("(IsDest && II.getRawDest() == OldPtr) || (!IsDest && II.getRawSource() == OldPtr)"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3058, __extension__ __PRETTY_FUNCTION__
))
3
Assuming 'IsDest' is false
4
Assuming pointer value is null
5
'?' condition is true
3058 (!IsDest && II.getRawSource() == OldPtr))(static_cast <bool> ((IsDest && II.getRawDest()
== OldPtr) || (!IsDest && II.getRawSource() == OldPtr
)) ? void (0) : __assert_fail ("(IsDest && II.getRawDest() == OldPtr) || (!IsDest && II.getRawSource() == OldPtr)"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3058, __extension__ __PRETTY_FUNCTION__
))
;
3059
3060 Align SliceAlign = getSliceAlign();
3061
3062 // For unsplit intrinsics, we simply modify the source and destination
3063 // pointers in place. This isn't just an optimization, it is a matter of
3064 // correctness. With unsplit intrinsics we may be dealing with transfers
3065 // within a single alloca before SROA ran, or with transfers that have
3066 // a variable length. We may also be dealing with memmove instead of
3067 // memcpy, and so simply updating the pointers is the necessary for us to
3068 // update both source and dest of a single call.
3069 if (!IsSplittable) {
6
Assuming field 'IsSplittable' is false
7
Taking true branch
3070 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
8
Called C++ object pointer is null
3071 if (IsDest) {
3072 II.setDest(AdjustedPtr);
3073 II.setDestAlignment(SliceAlign);
3074 }
3075 else {
3076 II.setSource(AdjustedPtr);
3077 II.setSourceAlignment(SliceAlign);
3078 }
3079
3080 LLVM_DEBUG(dbgs() << " to: " << II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << II <<
"\n"; } } while (false)
;
3081 deleteIfTriviallyDead(OldPtr);
3082 return false;
3083 }
3084 // For split transfer intrinsics we have an incredibly useful assurance:
3085 // the source and destination do not reside within the same alloca, and at
3086 // least one of them does not escape. This means that we can replace
3087 // memmove with memcpy, and we don't need to worry about all manner of
3088 // downsides to splitting and transforming the operations.
3089
3090 // If this doesn't map cleanly onto the alloca type, and that type isn't
3091 // a single value type, just emit a memcpy.
3092 bool EmitMemCpy =
3093 !VecTy && !IntTy &&
3094 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3095 SliceSize !=
3096 DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedSize() ||
3097 !NewAI.getAllocatedType()->isSingleValueType());
3098
3099 // If we're just going to emit a memcpy, the alloca hasn't changed, and the
3100 // size hasn't been shrunk based on analysis of the viable range, this is
3101 // a no-op.
3102 if (EmitMemCpy && &OldAI == &NewAI) {
3103 // Ensure the start lines up.
3104 assert(NewBeginOffset == BeginOffset)(static_cast <bool> (NewBeginOffset == BeginOffset) ? void
(0) : __assert_fail ("NewBeginOffset == BeginOffset", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 3104, __extension__ __PRETTY_FUNCTION__))
;
3105
3106 // Rewrite the size as needed.
3107 if (NewEndOffset != EndOffset)
3108 II.setLength(ConstantInt::get(II.getLength()->getType(),
3109 NewEndOffset - NewBeginOffset));
3110 return false;
3111 }
3112 // Record this instruction for deletion.
3113 Pass.DeadInsts.push_back(&II);
3114
3115 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3116 // alloca that should be re-examined after rewriting this instruction.
3117 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3118 if (AllocaInst *AI =
3119 dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
3120 assert(AI != &OldAI && AI != &NewAI &&(static_cast <bool> (AI != &OldAI && AI != &
NewAI && "Splittable transfers cannot reach the same alloca on both ends."
) ? void (0) : __assert_fail ("AI != &OldAI && AI != &NewAI && \"Splittable transfers cannot reach the same alloca on both ends.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3121, __extension__ __PRETTY_FUNCTION__
))
3121 "Splittable transfers cannot reach the same alloca on both ends.")(static_cast <bool> (AI != &OldAI && AI != &
NewAI && "Splittable transfers cannot reach the same alloca on both ends."
) ? void (0) : __assert_fail ("AI != &OldAI && AI != &NewAI && \"Splittable transfers cannot reach the same alloca on both ends.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3121, __extension__ __PRETTY_FUNCTION__
))
;
3122 Pass.Worklist.insert(AI);
3123 }
3124
3125 Type *OtherPtrTy = OtherPtr->getType();
3126 unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3127
3128 // Compute the relative offset for the other pointer within the transfer.
3129 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3130 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3131 Align OtherAlign =
3132 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3133 OtherAlign =
3134 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3135
3136 if (EmitMemCpy) {
3137 // Compute the other pointer, folding as much as possible to produce
3138 // a single, simple GEP in most cases.
3139 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3140 OtherPtr->getName() + ".");
3141
3142 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3143 Type *SizeTy = II.getLength()->getType();
3144 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3145
3146 Value *DestPtr, *SrcPtr;
3147 MaybeAlign DestAlign, SrcAlign;
3148 // Note: IsDest is true iff we're copying into the new alloca slice
3149 if (IsDest) {
3150 DestPtr = OurPtr;
3151 DestAlign = SliceAlign;
3152 SrcPtr = OtherPtr;
3153 SrcAlign = OtherAlign;
3154 } else {
3155 DestPtr = OtherPtr;
3156 DestAlign = OtherAlign;
3157 SrcPtr = OurPtr;
3158 SrcAlign = SliceAlign;
3159 }
3160 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3161 Size, II.isVolatile());
3162 if (AATags)
3163 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3164 LLVM_DEBUG(dbgs() << " to: " << *New << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *New <<
"\n"; } } while (false)
;
3165 return false;
3166 }
3167
3168 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3169 NewEndOffset == NewAllocaEndOffset;
3170 uint64_t Size = NewEndOffset - NewBeginOffset;
3171 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3172 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3173 unsigned NumElements = EndIndex - BeginIndex;
3174 IntegerType *SubIntTy =
3175 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3176
3177 // Reset the other pointer type to match the register type we're going to
3178 // use, but using the address space of the original other pointer.
3179 Type *OtherTy;
3180 if (VecTy && !IsWholeAlloca) {
3181 if (NumElements == 1)
3182 OtherTy = VecTy->getElementType();
3183 else
3184 OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
3185 } else if (IntTy && !IsWholeAlloca) {
3186 OtherTy = SubIntTy;
3187 } else {
3188 OtherTy = NewAllocaTy;
3189 }
3190 OtherPtrTy = OtherTy->getPointerTo(OtherAS);
3191
3192 Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3193 OtherPtr->getName() + ".");
3194 MaybeAlign SrcAlign = OtherAlign;
3195 MaybeAlign DstAlign = SliceAlign;
3196 if (!IsDest)
3197 std::swap(SrcAlign, DstAlign);
3198
3199 Value *SrcPtr;
3200 Value *DstPtr;
3201
3202 if (IsDest) {
3203 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3204 SrcPtr = AdjPtr;
3205 } else {
3206 DstPtr = AdjPtr;
3207 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3208 }
3209
3210 Value *Src;
3211 if (VecTy && !IsWholeAlloca && !IsDest) {
3212 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3213 NewAI.getAlign(), "load");
3214 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3215 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3216 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3217 NewAI.getAlign(), "load");
3218 Src = convertValue(DL, IRB, Src, IntTy);
3219 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3220 Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3221 } else {
3222 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3223 II.isVolatile(), "copyload");
3224 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3225 LLVMContext::MD_access_group});
3226 if (AATags)
3227 Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3228 Src = Load;
3229 }
3230
3231 if (VecTy && !IsWholeAlloca && IsDest) {
3232 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3233 NewAI.getAlign(), "oldload");
3234 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3235 } else if (IntTy && !IsWholeAlloca && IsDest) {
3236 Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3237 NewAI.getAlign(), "oldload");
3238 Old = convertValue(DL, IRB, Old, IntTy);
3239 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3240 Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3241 Src = convertValue(DL, IRB, Src, NewAllocaTy);
3242 }
3243
3244 StoreInst *Store = cast<StoreInst>(
3245 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3246 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3247 LLVMContext::MD_access_group});
3248 if (AATags)
3249 Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3250 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *Store <<
"\n"; } } while (false)
;
3251 return !II.isVolatile();
3252 }
3253
3254 bool visitIntrinsicInst(IntrinsicInst &II) {
3255 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&(static_cast <bool> ((II.isLifetimeStartOrEnd() || II.isDroppable
()) && "Unexpected intrinsic!") ? void (0) : __assert_fail
("(II.isLifetimeStartOrEnd() || II.isDroppable()) && \"Unexpected intrinsic!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3256, __extension__ __PRETTY_FUNCTION__
))
3256 "Unexpected intrinsic!")(static_cast <bool> ((II.isLifetimeStartOrEnd() || II.isDroppable
()) && "Unexpected intrinsic!") ? void (0) : __assert_fail
("(II.isLifetimeStartOrEnd() || II.isDroppable()) && \"Unexpected intrinsic!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3256, __extension__ __PRETTY_FUNCTION__
))
;
3257 LLVM_DEBUG(dbgs() << " original: " << II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << II <<
"\n"; } } while (false)
;
3258
3259 // Record this instruction for deletion.
3260 Pass.DeadInsts.push_back(&II);
3261
3262 if (II.isDroppable()) {
3263 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume")(static_cast <bool> (II.getIntrinsicID() == Intrinsic::
assume && "Expected assume") ? void (0) : __assert_fail
("II.getIntrinsicID() == Intrinsic::assume && \"Expected assume\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3263, __extension__ __PRETTY_FUNCTION__
))
;
3264 // TODO For now we forget assumed information, this can be improved.
3265 OldPtr->dropDroppableUsesIn(II);
3266 return true;
3267 }
3268
3269 assert(II.getArgOperand(1) == OldPtr)(static_cast <bool> (II.getArgOperand(1) == OldPtr) ? void
(0) : __assert_fail ("II.getArgOperand(1) == OldPtr", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 3269, __extension__ __PRETTY_FUNCTION__))
;
3270 // Lifetime intrinsics are only promotable if they cover the whole alloca.
3271 // Therefore, we drop lifetime intrinsics which don't cover the whole
3272 // alloca.
3273 // (In theory, intrinsics which partially cover an alloca could be
3274 // promoted, but PromoteMemToReg doesn't handle that case.)
3275 // FIXME: Check whether the alloca is promotable before dropping the
3276 // lifetime intrinsics?
3277 if (NewBeginOffset != NewAllocaBeginOffset ||
3278 NewEndOffset != NewAllocaEndOffset)
3279 return true;
3280
3281 ConstantInt *Size =
3282 ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
3283 NewEndOffset - NewBeginOffset);
3284 // Lifetime intrinsics always expect an i8* so directly get such a pointer
3285 // for the new alloca slice.
3286 Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
3287 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3288 Value *New;
3289 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3290 New = IRB.CreateLifetimeStart(Ptr, Size);
3291 else
3292 New = IRB.CreateLifetimeEnd(Ptr, Size);
3293
3294 (void)New;
3295 LLVM_DEBUG(dbgs() << " to: " << *New << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *New <<
"\n"; } } while (false)
;
3296
3297 return true;
3298 }
3299
3300 void fixLoadStoreAlign(Instruction &Root) {
3301 // This algorithm implements the same visitor loop as
3302 // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3303 // or store found.
3304 SmallPtrSet<Instruction *, 4> Visited;
3305 SmallVector<Instruction *, 4> Uses;
3306 Visited.insert(&Root);
3307 Uses.push_back(&Root);
3308 do {
3309 Instruction *I = Uses.pop_back_val();
3310
3311 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3312 LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
3313 continue;
3314 }
3315 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3316 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3317 continue;
3318 }
3319
3320 assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||(static_cast <bool> (isa<BitCastInst>(I) || isa<
AddrSpaceCastInst>(I) || isa<PHINode>(I) || isa<SelectInst
>(I) || isa<GetElementPtrInst>(I)) ? void (0) : __assert_fail
("isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) || isa<PHINode>(I) || isa<SelectInst>(I) || isa<GetElementPtrInst>(I)"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3322, __extension__ __PRETTY_FUNCTION__
))
3321 isa<PHINode>(I) || isa<SelectInst>(I) ||(static_cast <bool> (isa<BitCastInst>(I) || isa<
AddrSpaceCastInst>(I) || isa<PHINode>(I) || isa<SelectInst
>(I) || isa<GetElementPtrInst>(I)) ? void (0) : __assert_fail
("isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) || isa<PHINode>(I) || isa<SelectInst>(I) || isa<GetElementPtrInst>(I)"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3322, __extension__ __PRETTY_FUNCTION__
))
3322 isa<GetElementPtrInst>(I))(static_cast <bool> (isa<BitCastInst>(I) || isa<
AddrSpaceCastInst>(I) || isa<PHINode>(I) || isa<SelectInst
>(I) || isa<GetElementPtrInst>(I)) ? void (0) : __assert_fail
("isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) || isa<PHINode>(I) || isa<SelectInst>(I) || isa<GetElementPtrInst>(I)"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3322, __extension__ __PRETTY_FUNCTION__
))
;
3323 for (User *U : I->users())
3324 if (Visited.insert(cast<Instruction>(U)).second)
3325 Uses.push_back(cast<Instruction>(U));
3326 } while (!Uses.empty());
3327 }
3328
3329 bool visitPHINode(PHINode &PN) {
3330 LLVM_DEBUG(dbgs() << " original: " << PN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << PN <<
"\n"; } } while (false)
;
3331 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable")(static_cast <bool> (BeginOffset >= NewAllocaBeginOffset
&& "PHIs are unsplittable") ? void (0) : __assert_fail
("BeginOffset >= NewAllocaBeginOffset && \"PHIs are unsplittable\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3331, __extension__ __PRETTY_FUNCTION__
))
;
3332 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable")(static_cast <bool> (EndOffset <= NewAllocaEndOffset
&& "PHIs are unsplittable") ? void (0) : __assert_fail
("EndOffset <= NewAllocaEndOffset && \"PHIs are unsplittable\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3332, __extension__ __PRETTY_FUNCTION__
))
;
3333
3334 // We would like to compute a new pointer in only one place, but have it be
3335 // as local as possible to the PHI. To do that, we re-use the location of
3336 // the old pointer, which necessarily must be in the right position to
3337 // dominate the PHI.
3338 IRBuilderBase::InsertPointGuard Guard(IRB);
3339 if (isa<PHINode>(OldPtr))
3340 IRB.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
3341 else
3342 IRB.SetInsertPoint(OldPtr);
3343 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3344
3345 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3346 // Replace the operands which were using the old pointer.
3347 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
3348
3349 LLVM_DEBUG(dbgs() << " to: " << PN << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << PN <<
"\n"; } } while (false)
;
3350 deleteIfTriviallyDead(OldPtr);
3351
3352 // Fix the alignment of any loads or stores using this PHI node.
3353 fixLoadStoreAlign(PN);
3354
3355 // PHIs can't be promoted on their own, but often can be speculated. We
3356 // check the speculation outside of the rewriter so that we see the
3357 // fully-rewritten alloca.
3358 PHIUsers.insert(&PN);
3359 return true;
3360 }
3361
3362 bool visitSelectInst(SelectInst &SI) {
3363 LLVM_DEBUG(dbgs() << " original: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << SI <<
"\n"; } } while (false)
;
3364 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&(static_cast <bool> ((SI.getTrueValue() == OldPtr || SI
.getFalseValue() == OldPtr) && "Pointer isn't an operand!"
) ? void (0) : __assert_fail ("(SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && \"Pointer isn't an operand!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3365, __extension__ __PRETTY_FUNCTION__
))
3365 "Pointer isn't an operand!")(static_cast <bool> ((SI.getTrueValue() == OldPtr || SI
.getFalseValue() == OldPtr) && "Pointer isn't an operand!"
) ? void (0) : __assert_fail ("(SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && \"Pointer isn't an operand!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3365, __extension__ __PRETTY_FUNCTION__
))
;
3366 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable")(static_cast <bool> (BeginOffset >= NewAllocaBeginOffset
&& "Selects are unsplittable") ? void (0) : __assert_fail
("BeginOffset >= NewAllocaBeginOffset && \"Selects are unsplittable\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3366, __extension__ __PRETTY_FUNCTION__
))
;
3367 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable")(static_cast <bool> (EndOffset <= NewAllocaEndOffset
&& "Selects are unsplittable") ? void (0) : __assert_fail
("EndOffset <= NewAllocaEndOffset && \"Selects are unsplittable\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3367, __extension__ __PRETTY_FUNCTION__
))
;
3368
3369 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3370 // Replace the operands which were using the old pointer.
3371 if (SI.getOperand(1) == OldPtr)
3372 SI.setOperand(1, NewPtr);
3373 if (SI.getOperand(2) == OldPtr)
3374 SI.setOperand(2, NewPtr);
3375
3376 LLVM_DEBUG(dbgs() << " to: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << SI <<
"\n"; } } while (false)
;
3377 deleteIfTriviallyDead(OldPtr);
3378
3379 // Fix the alignment of any loads or stores using this select.
3380 fixLoadStoreAlign(SI);
3381
3382 // Selects can't be promoted on their own, but often can be speculated. We
3383 // check the speculation outside of the rewriter so that we see the
3384 // fully-rewritten alloca.
3385 SelectUsers.insert(&SI);
3386 return true;
3387 }
3388};
3389
3390namespace {
3391
3392/// Visitor to rewrite aggregate loads and stores as scalar.
3393///
3394/// This pass aggressively rewrites all aggregate loads and stores on
3395/// a particular pointer (or any pointer derived from it which we can identify)
3396/// with scalar loads and stores.
3397class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3398 // Befriend the base class so it can delegate to private visit methods.
3399 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3400
3401 /// Queue of pointer uses to analyze and potentially rewrite.
3402 SmallVector<Use *, 8> Queue;
3403
3404 /// Set to prevent us from cycling with phi nodes and loops.
3405 SmallPtrSet<User *, 8> Visited;
3406
3407 /// The current pointer use being rewritten. This is used to dig up the used
3408 /// value (as opposed to the user).
3409 Use *U = nullptr;
3410
3411 /// Used to calculate offsets, and hence alignment, of subobjects.
3412 const DataLayout &DL;
3413
3414 IRBuilderTy &IRB;
3415
3416public:
3417 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
3418 : DL(DL), IRB(IRB) {}
3419
3420 /// Rewrite loads and stores through a pointer and all pointers derived from
3421 /// it.
3422 bool rewrite(Instruction &I) {
3423 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting FCA loads and stores...\n"
; } } while (false)
;
3424 enqueueUsers(I);
3425 bool Changed = false;
3426 while (!Queue.empty()) {
3427 U = Queue.pop_back_val();
3428 Changed |= visit(cast<Instruction>(U->getUser()));
3429 }
3430 return Changed;
3431 }
3432
3433private:
3434 /// Enqueue all the users of the given instruction for further processing.
3435 /// This uses a set to de-duplicate users.
3436 void enqueueUsers(Instruction &I) {
3437 for (Use &U : I.uses())
3438 if (Visited.insert(U.getUser()).second)
3439 Queue.push_back(&U);
3440 }
3441
3442 // Conservative default is to not rewrite anything.
3443 bool visitInstruction(Instruction &I) { return false; }
3444
3445 /// Generic recursive split emission class.
3446 template <typename Derived> class OpSplitter {
3447 protected:
3448 /// The builder used to form new instructions.
3449 IRBuilderTy &IRB;
3450
3451 /// The indices which to be used with insert- or extractvalue to select the
3452 /// appropriate value within the aggregate.
3453 SmallVector<unsigned, 4> Indices;
3454
3455 /// The indices to a GEP instruction which will move Ptr to the correct slot
3456 /// within the aggregate.
3457 SmallVector<Value *, 4> GEPIndices;
3458
3459 /// The base pointer of the original op, used as a base for GEPing the
3460 /// split operations.
3461 Value *Ptr;
3462
3463 /// The base pointee type being GEPed into.
3464 Type *BaseTy;
3465
3466 /// Known alignment of the base pointer.
3467 Align BaseAlign;
3468
3469 /// To calculate offset of each component so we can correctly deduce
3470 /// alignments.
3471 const DataLayout &DL;
3472
3473 /// Initialize the splitter with an insertion point, Ptr and start with a
3474 /// single zero GEP index.
3475 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3476 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
3477 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
3478 BaseAlign(BaseAlign), DL(DL) {
3479 IRB.SetInsertPoint(InsertionPoint);
3480 }
3481
3482 public:
3483 /// Generic recursive split emission routine.
3484 ///
3485 /// This method recursively splits an aggregate op (load or store) into
3486 /// scalar or vector ops. It splits recursively until it hits a single value
3487 /// and emits that single value operation via the template argument.
3488 ///
3489 /// The logic of this routine relies on GEPs and insertvalue and
3490 /// extractvalue all operating with the same fundamental index list, merely
3491 /// formatted differently (GEPs need actual values).
3492 ///
3493 /// \param Ty The type being split recursively into smaller ops.
3494 /// \param Agg The aggregate value being built up or stored, depending on
3495 /// whether this is splitting a load or a store respectively.
3496 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
3497 if (Ty->isSingleValueType()) {
3498 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
3499 return static_cast<Derived *>(this)->emitFunc(
3500 Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
3501 }
3502
3503 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3504 unsigned OldSize = Indices.size();
3505 (void)OldSize;
3506 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
3507 ++Idx) {
3508 assert(Indices.size() == OldSize && "Did not return to the old size")(static_cast <bool> (Indices.size() == OldSize &&
"Did not return to the old size") ? void (0) : __assert_fail
("Indices.size() == OldSize && \"Did not return to the old size\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3508, __extension__ __PRETTY_FUNCTION__
))
;
3509 Indices.push_back(Idx);
3510 GEPIndices.push_back(IRB.getInt32(Idx));
3511 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
3512 GEPIndices.pop_back();
3513 Indices.pop_back();
3514 }
3515 return;
3516 }
3517
3518 if (StructType *STy = dyn_cast<StructType>(Ty)) {
3519 unsigned OldSize = Indices.size();
3520 (void)OldSize;
3521 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
3522 ++Idx) {
3523 assert(Indices.size() == OldSize && "Did not return to the old size")(static_cast <bool> (Indices.size() == OldSize &&
"Did not return to the old size") ? void (0) : __assert_fail
("Indices.size() == OldSize && \"Did not return to the old size\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3523, __extension__ __PRETTY_FUNCTION__
))
;
3524 Indices.push_back(Idx);
3525 GEPIndices.push_back(IRB.getInt32(Idx));
3526 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
3527 GEPIndices.pop_back();
3528 Indices.pop_back();
3529 }
3530 return;
3531 }
3532
3533 llvm_unreachable("Only arrays and structs are aggregate loadable types")::llvm::llvm_unreachable_internal("Only arrays and structs are aggregate loadable types"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3533)
;
3534 }
3535 };
3536
3537 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
3538 AAMDNodes AATags;
3539
3540 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3541 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
3542 IRBuilderTy &IRB)
3543 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
3544 IRB),
3545 AATags(AATags) {}
3546
3547 /// Emit a leaf load of a single value. This is called at the leaves of the
3548 /// recursive emission to actually load values.
3549 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
3550 assert(Ty->isSingleValueType())(static_cast <bool> (Ty->isSingleValueType()) ? void
(0) : __assert_fail ("Ty->isSingleValueType()", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 3550, __extension__ __PRETTY_FUNCTION__))
;
3551 // Load the single value and insert it using the indices.
3552 Value *GEP =
3553 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3554 LoadInst *Load =
3555 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
3556
3557 APInt Offset(
3558 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
3559 if (AATags &&
3560 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
3561 Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
3562
3563 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
3564 LLVM_DEBUG(dbgs() << " to: " << *Load << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *Load <<
"\n"; } } while (false)
;
3565 }
3566 };
3567
3568 bool visitLoadInst(LoadInst &LI) {
3569 assert(LI.getPointerOperand() == *U)(static_cast <bool> (LI.getPointerOperand() == *U) ? void
(0) : __assert_fail ("LI.getPointerOperand() == *U", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 3569, __extension__ __PRETTY_FUNCTION__))
;
3570 if (!LI.isSimple() || LI.getType()->isSingleValueType())
3571 return false;
3572
3573 // We have an aggregate being loaded, split it apart.
3574 LLVM_DEBUG(dbgs() << " original: " << LI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << LI <<
"\n"; } } while (false)
;
3575 LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
3576 getAdjustedAlignment(&LI, 0), DL, IRB);
3577 Value *V = PoisonValue::get(LI.getType());
3578 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
3579 Visited.erase(&LI);
3580 LI.replaceAllUsesWith(V);
3581 LI.eraseFromParent();
3582 return true;
3583 }
3584
3585 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
3586 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
3587 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
3588 IRBuilderTy &IRB)
3589 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3590 DL, IRB),
3591 AATags(AATags) {}
3592 AAMDNodes AATags;
3593 /// Emit a leaf store of a single value. This is called at the leaves of the
3594 /// recursive emission to actually produce stores.
3595 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
3596 assert(Ty->isSingleValueType())(static_cast <bool> (Ty->isSingleValueType()) ? void
(0) : __assert_fail ("Ty->isSingleValueType()", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 3596, __extension__ __PRETTY_FUNCTION__))
;
3597 // Extract the single value and store it using the indices.
3598 //
3599 // The gep and extractvalue values are factored out of the CreateStore
3600 // call to make the output independent of the argument evaluation order.
3601 Value *ExtractValue =
3602 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
3603 Value *InBoundsGEP =
3604 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
3605 StoreInst *Store =
3606 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3607
3608 APInt Offset(
3609 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
3610 if (AATags &&
3611 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
3612 Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
3613
3614 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " to: " << *Store <<
"\n"; } } while (false)
;
3615 }
3616 };
3617
3618 bool visitStoreInst(StoreInst &SI) {
3619 if (!SI.isSimple() || SI.getPointerOperand() != *U)
3620 return false;
3621 Value *V = SI.getValueOperand();
3622 if (V->getType()->isSingleValueType())
3623 return false;
3624
3625 // We have an aggregate being stored, split it apart.
3626 LLVM_DEBUG(dbgs() << " original: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " original: " << SI <<
"\n"; } } while (false)
;
3627 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(),
3628 getAdjustedAlignment(&SI, 0), DL, IRB);
3629 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
3630 Visited.erase(&SI);
3631 SI.eraseFromParent();
3632 return true;
3633 }
3634
3635 bool visitBitCastInst(BitCastInst &BC) {
3636 enqueueUsers(BC);
3637 return false;
3638 }
3639
3640 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
3641 enqueueUsers(ASC);
3642 return false;
3643 }
3644
3645 // Fold gep (select cond, ptr1, ptr2) => select cond, gep(ptr1), gep(ptr2)
3646 bool foldGEPSelect(GetElementPtrInst &GEPI) {
3647 if (!GEPI.hasAllConstantIndices())
3648 return false;
3649
3650 SelectInst *Sel = cast<SelectInst>(GEPI.getPointerOperand());
3651
3652 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting gep(select) -> select(gep):"
<< "\n original: " << *Sel << "\n "
<< GEPI; } } while (false)
3653 << "\n original: " << *Seldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting gep(select) -> select(gep):"
<< "\n original: " << *Sel << "\n "
<< GEPI; } } while (false)
3654 << "\n " << GEPI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting gep(select) -> select(gep):"
<< "\n original: " << *Sel << "\n "
<< GEPI; } } while (false)
;
3655
3656 IRB.SetInsertPoint(&GEPI);
3657 SmallVector<Value *, 4> Index(GEPI.indices());
3658 bool IsInBounds = GEPI.isInBounds();
3659
3660 Type *Ty = GEPI.getSourceElementType();
3661 Value *True = Sel->getTrueValue();
3662 Value *NTrue = IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep",
3663 IsInBounds);
3664
3665 Value *False = Sel->getFalseValue();
3666
3667 Value *NFalse = IRB.CreateGEP(Ty, False, Index,
3668 False->getName() + ".sroa.gep", IsInBounds);
3669
3670 Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
3671 Sel->getName() + ".sroa.sel");
3672 Visited.erase(&GEPI);
3673 GEPI.replaceAllUsesWith(NSel);
3674 GEPI.eraseFromParent();
3675 Instruction *NSelI = cast<Instruction>(NSel);
3676 Visited.insert(NSelI);
3677 enqueueUsers(*NSelI);
3678
3679 LLVM_DEBUG(dbgs() << "\n to: " << *NTruedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "\n to: " << *NTrue
<< "\n " << *NFalse << "\n "
<< *NSel << '\n'; } } while (false)
3680 << "\n " << *NFalsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "\n to: " << *NTrue
<< "\n " << *NFalse << "\n "
<< *NSel << '\n'; } } while (false)
3681 << "\n " << *NSel << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "\n to: " << *NTrue
<< "\n " << *NFalse << "\n "
<< *NSel << '\n'; } } while (false)
;
3682
3683 return true;
3684 }
3685
3686 // Fold gep (phi ptr1, ptr2) => phi gep(ptr1), gep(ptr2)
3687 bool foldGEPPhi(GetElementPtrInst &GEPI) {
3688 if (!GEPI.hasAllConstantIndices())
3689 return false;
3690
3691 PHINode *PHI = cast<PHINode>(GEPI.getPointerOperand());
3692 if (GEPI.getParent() != PHI->getParent() ||
3693 llvm::any_of(PHI->incoming_values(), [](Value *In)
3694 { Instruction *I = dyn_cast<Instruction>(In);
3695 return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
3696 succ_empty(I->getParent()) ||
3697 !I->getParent()->isLegalToHoistInto();
3698 }))
3699 return false;
3700
3701 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting gep(phi) -> phi(gep):"
<< "\n original: " << *PHI << "\n "
<< GEPI << "\n to: "; } } while (false)
3702 << "\n original: " << *PHIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting gep(phi) -> phi(gep):"
<< "\n original: " << *PHI << "\n "
<< GEPI << "\n to: "; } } while (false)
3703 << "\n " << GEPIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting gep(phi) -> phi(gep):"
<< "\n original: " << *PHI << "\n "
<< GEPI << "\n to: "; } } while (false)
3704 << "\n to: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting gep(phi) -> phi(gep):"
<< "\n original: " << *PHI << "\n "
<< GEPI << "\n to: "; } } while (false)
;
3705
3706 SmallVector<Value *, 4> Index(GEPI.indices());
3707 bool IsInBounds = GEPI.isInBounds();
3708 IRB.SetInsertPoint(GEPI.getParent()->getFirstNonPHI());
3709 PHINode *NewPN = IRB.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(),
3710 PHI->getName() + ".sroa.phi");
3711 for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
3712 BasicBlock *B = PHI->getIncomingBlock(I);
3713 Value *NewVal = nullptr;
3714 int Idx = NewPN->getBasicBlockIndex(B);
3715 if (Idx >= 0) {
3716 NewVal = NewPN->getIncomingValue(Idx);
3717 } else {
3718 Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));
3719
3720 IRB.SetInsertPoint(In->getParent(), std::next(In->getIterator()));
3721 Type *Ty = GEPI.getSourceElementType();
3722 NewVal = IRB.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep",
3723 IsInBounds);
3724 }
3725 NewPN->addIncoming(NewVal, B);
3726 }
3727
3728 Visited.erase(&GEPI);
3729 GEPI.replaceAllUsesWith(NewPN);
3730 GEPI.eraseFromParent();
3731 Visited.insert(NewPN);
3732 enqueueUsers(*NewPN);
3733
3734 LLVM_DEBUG(for (Value *In : NewPN->incoming_values())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { for (Value *In : NewPN->incoming_values()) dbgs
() << "\n " << *In; dbgs() << "\n "
<< *NewPN << '\n'; } } while (false)
3735 dbgs() << "\n " << *In;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { for (Value *In : NewPN->incoming_values()) dbgs
() << "\n " << *In; dbgs() << "\n "
<< *NewPN << '\n'; } } while (false)
3736 dbgs() << "\n " << *NewPN << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { for (Value *In : NewPN->incoming_values()) dbgs
() << "\n " << *In; dbgs() << "\n "
<< *NewPN << '\n'; } } while (false)
;
3737
3738 return true;
3739 }
3740
3741 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
3742 if (isa<SelectInst>(GEPI.getPointerOperand()) &&
3743 foldGEPSelect(GEPI))
3744 return true;
3745
3746 if (isa<PHINode>(GEPI.getPointerOperand()) &&
3747 foldGEPPhi(GEPI))
3748 return true;
3749
3750 enqueueUsers(GEPI);
3751 return false;
3752 }
3753
3754 bool visitPHINode(PHINode &PN) {
3755 enqueueUsers(PN);
3756 return false;
3757 }
3758
3759 bool visitSelectInst(SelectInst &SI) {
3760 enqueueUsers(SI);
3761 return false;
3762 }
3763};
3764
3765} // end anonymous namespace
3766
3767/// Strip aggregate type wrapping.
3768///
3769/// This removes no-op aggregate types wrapping an underlying type. It will
3770/// strip as many layers of types as it can without changing either the type
3771/// size or the allocated size.
3772static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
3773 if (Ty->isSingleValueType())
3774 return Ty;
3775
3776 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedSize();
3777 uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedSize();
3778
3779 Type *InnerTy;
3780 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3781 InnerTy = ArrTy->getElementType();
3782 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
3783 const StructLayout *SL = DL.getStructLayout(STy);
3784 unsigned Index = SL->getElementContainingOffset(0);
3785 InnerTy = STy->getElementType(Index);
3786 } else {
3787 return Ty;
3788 }
3789
3790 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedSize() ||
3791 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedSize())
3792 return Ty;
3793
3794 return stripAggregateTypeWrapping(DL, InnerTy);
3795}
3796
3797/// Try to find a partition of the aggregate type passed in for a given
3798/// offset and size.
3799///
3800/// This recurses through the aggregate type and tries to compute a subtype
3801/// based on the offset and size. When the offset and size span a sub-section
3802/// of an array, it will even compute a new array type for that sub-section,
3803/// and the same for structs.
3804///
3805/// Note that this routine is very strict and tries to find a partition of the
3806/// type which produces the *exact* right offset and size. It is not forgiving
3807/// when the size or offset cause either end of type-based partition to be off.
3808/// Also, this is a best-effort routine. It is reasonable to give up and not
3809/// return a type if necessary.
3810static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
3811 uint64_t Size) {
3812 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedSize() == Size)
3813 return stripAggregateTypeWrapping(DL, Ty);
3814 if (Offset > DL.getTypeAllocSize(Ty).getFixedSize() ||
3815 (DL.getTypeAllocSize(Ty).getFixedSize() - Offset) < Size)
3816 return nullptr;
3817
3818 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
3819 Type *ElementTy;
3820 uint64_t TyNumElements;
3821 if (auto *AT = dyn_cast<ArrayType>(Ty)) {
3822 ElementTy = AT->getElementType();
3823 TyNumElements = AT->getNumElements();
3824 } else {
3825 // FIXME: This isn't right for vectors with non-byte-sized or
3826 // non-power-of-two sized elements.
3827 auto *VT = cast<FixedVectorType>(Ty);
3828 ElementTy = VT->getElementType();
3829 TyNumElements = VT->getNumElements();
3830 }
3831 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize();
3832 uint64_t NumSkippedElements = Offset / ElementSize;
3833 if (NumSkippedElements >= TyNumElements)
3834 return nullptr;
3835 Offset -= NumSkippedElements * ElementSize;
3836
3837 // First check if we need to recurse.
3838 if (Offset > 0 || Size < ElementSize) {
3839 // Bail if the partition ends in a different array element.
3840 if ((Offset + Size) > ElementSize)
3841 return nullptr;
3842 // Recurse through the element type trying to peel off offset bytes.
3843 return getTypePartition(DL, ElementTy, Offset, Size);
3844 }
3845 assert(Offset == 0)(static_cast <bool> (Offset == 0) ? void (0) : __assert_fail
("Offset == 0", "llvm/lib/Transforms/Scalar/SROA.cpp", 3845,
__extension__ __PRETTY_FUNCTION__))
;
3846
3847 if (Size == ElementSize)
3848 return stripAggregateTypeWrapping(DL, ElementTy);
3849 assert(Size > ElementSize)(static_cast <bool> (Size > ElementSize) ? void (0) :
__assert_fail ("Size > ElementSize", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 3849, __extension__ __PRETTY_FUNCTION__))
;
3850 uint64_t NumElements = Size / ElementSize;
3851 if (NumElements * ElementSize != Size)
3852 return nullptr;
3853 return ArrayType::get(ElementTy, NumElements);
3854 }
3855
3856 StructType *STy = dyn_cast<StructType>(Ty);
3857 if (!STy)
3858 return nullptr;
3859
3860 const StructLayout *SL = DL.getStructLayout(STy);
3861 if (Offset >= SL->getSizeInBytes())
3862 return nullptr;
3863 uint64_t EndOffset = Offset + Size;
3864 if (EndOffset > SL->getSizeInBytes())
3865 return nullptr;
3866
3867 unsigned Index = SL->getElementContainingOffset(Offset);
3868 Offset -= SL->getElementOffset(Index);
3869
3870 Type *ElementTy = STy->getElementType(Index);
3871 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize();
3872 if (Offset >= ElementSize)
3873 return nullptr; // The offset points into alignment padding.
3874
3875 // See if any partition must be contained by the element.
3876 if (Offset > 0 || Size < ElementSize) {
3877 if ((Offset + Size) > ElementSize)
3878 return nullptr;
3879 return getTypePartition(DL, ElementTy, Offset, Size);
3880 }
3881 assert(Offset == 0)(static_cast <bool> (Offset == 0) ? void (0) : __assert_fail
("Offset == 0", "llvm/lib/Transforms/Scalar/SROA.cpp", 3881,
__extension__ __PRETTY_FUNCTION__))
;
3882
3883 if (Size == ElementSize)
3884 return stripAggregateTypeWrapping(DL, ElementTy);
3885
3886 StructType::element_iterator EI = STy->element_begin() + Index,
3887 EE = STy->element_end();
3888 if (EndOffset < SL->getSizeInBytes()) {
3889 unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
3890 if (Index == EndIndex)
3891 return nullptr; // Within a single element and its padding.
3892
3893 // Don't try to form "natural" types if the elements don't line up with the
3894 // expected size.
3895 // FIXME: We could potentially recurse down through the last element in the
3896 // sub-struct to find a natural end point.
3897 if (SL->getElementOffset(EndIndex) != EndOffset)
3898 return nullptr;
3899
3900 assert(Index < EndIndex)(static_cast <bool> (Index < EndIndex) ? void (0) : __assert_fail
("Index < EndIndex", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 3900, __extension__ __PRETTY_FUNCTION__))
;
3901 EE = STy->element_begin() + EndIndex;
3902 }
3903
3904 // Try to build up a sub-structure.
3905 StructType *SubTy =
3906 StructType::get(STy->getContext(), makeArrayRef(EI, EE), STy->isPacked());
3907 const StructLayout *SubSL = DL.getStructLayout(SubTy);
3908 if (Size != SubSL->getSizeInBytes())
3909 return nullptr; // The sub-struct doesn't have quite the size needed.
3910
3911 return SubTy;
3912}
3913
3914/// Pre-split loads and stores to simplify rewriting.
3915///
3916/// We want to break up the splittable load+store pairs as much as
3917/// possible. This is important to do as a preprocessing step, as once we
3918/// start rewriting the accesses to partitions of the alloca we lose the
3919/// necessary information to correctly split apart paired loads and stores
3920/// which both point into this alloca. The case to consider is something like
3921/// the following:
3922///
3923/// %a = alloca [12 x i8]
3924/// %gep1 = getelementptr i8, ptr %a, i32 0
3925/// %gep2 = getelementptr i8, ptr %a, i32 4
3926/// %gep3 = getelementptr i8, ptr %a, i32 8
3927/// store float 0.0, ptr %gep1
3928/// store float 1.0, ptr %gep2
3929/// %v = load i64, ptr %gep1
3930/// store i64 %v, ptr %gep2
3931/// %f1 = load float, ptr %gep2
3932/// %f2 = load float, ptr %gep3
3933///
3934/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
3935/// promote everything so we recover the 2 SSA values that should have been
3936/// there all along.
3937///
3938/// \returns true if any changes are made.
3939bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
3940 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "Pre-splitting loads and stores\n"
; } } while (false)
;
3941
3942 // Track the loads and stores which are candidates for pre-splitting here, in
3943 // the order they first appear during the partition scan. These give stable
3944 // iteration order and a basis for tracking which loads and stores we
3945 // actually split.
3946 SmallVector<LoadInst *, 4> Loads;
3947 SmallVector<StoreInst *, 4> Stores;
3948
3949 // We need to accumulate the splits required of each load or store where we
3950 // can find them via a direct lookup. This is important to cross-check loads
3951 // and stores against each other. We also track the slice so that we can kill
3952 // all the slices that end up split.
3953 struct SplitOffsets {
3954 Slice *S;
3955 std::vector<uint64_t> Splits;
3956 };
3957 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
3958
3959 // Track loads out of this alloca which cannot, for any reason, be pre-split.
3960 // This is important as we also cannot pre-split stores of those loads!
3961 // FIXME: This is all pretty gross. It means that we can be more aggressive
3962 // in pre-splitting when the load feeding the store happens to come from
3963 // a separate alloca. Put another way, the effectiveness of SROA would be
3964 // decreased by a frontend which just concatenated all of its local allocas
3965 // into one big flat alloca. But defeating such patterns is exactly the job
3966 // SROA is tasked with! Sadly, to not have this discrepancy we would have
3967 // change store pre-splitting to actually force pre-splitting of the load
3968 // that feeds it *and all stores*. That makes pre-splitting much harder, but
3969 // maybe it would make it more principled?
3970 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
3971
3972 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Searching for candidate loads and stores\n"
; } } while (false)
;
3973 for (auto &P : AS.partitions()) {
3974 for (Slice &S : P) {
3975 Instruction *I = cast<Instruction>(S.getUse()->getUser());
3976 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
3977 // If this is a load we have to track that it can't participate in any
3978 // pre-splitting. If this is a store of a load we have to track that
3979 // that load also can't participate in any pre-splitting.
3980 if (auto *LI = dyn_cast<LoadInst>(I))
3981 UnsplittableLoads.insert(LI);
3982 else if (auto *SI = dyn_cast<StoreInst>(I))
3983 if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
3984 UnsplittableLoads.insert(LI);
3985 continue;
3986 }
3987 assert(P.endOffset() > S.beginOffset() &&(static_cast <bool> (P.endOffset() > S.beginOffset()
&& "Empty or backwards partition!") ? void (0) : __assert_fail
("P.endOffset() > S.beginOffset() && \"Empty or backwards partition!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3988, __extension__ __PRETTY_FUNCTION__
))
3988 "Empty or backwards partition!")(static_cast <bool> (P.endOffset() > S.beginOffset()
&& "Empty or backwards partition!") ? void (0) : __assert_fail
("P.endOffset() > S.beginOffset() && \"Empty or backwards partition!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3988, __extension__ __PRETTY_FUNCTION__
))
;
3989
3990 // Determine if this is a pre-splittable slice.
3991 if (auto *LI = dyn_cast<LoadInst>(I)) {
3992 assert(!LI->isVolatile() && "Cannot split volatile loads!")(static_cast <bool> (!LI->isVolatile() && "Cannot split volatile loads!"
) ? void (0) : __assert_fail ("!LI->isVolatile() && \"Cannot split volatile loads!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 3992, __extension__ __PRETTY_FUNCTION__
))
;
3993
3994 // The load must be used exclusively to store into other pointers for
3995 // us to be able to arbitrarily pre-split it. The stores must also be
3996 // simple to avoid changing semantics.
3997 auto IsLoadSimplyStored = [](LoadInst *LI) {
3998 for (User *LU : LI->users()) {
3999 auto *SI = dyn_cast<StoreInst>(LU);
4000 if (!SI || !SI->isSimple())
4001 return false;
4002 }
4003 return true;
4004 };
4005 if (!IsLoadSimplyStored(LI)) {
4006 UnsplittableLoads.insert(LI);
4007 continue;
4008 }
4009
4010 Loads.push_back(LI);
4011 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4012 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4013 // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
4014 continue;
4015 auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
4016 if (!StoredLoad || !StoredLoad->isSimple())
4017 continue;
4018 assert(!SI->isVolatile() && "Cannot split volatile stores!")(static_cast <bool> (!SI->isVolatile() && "Cannot split volatile stores!"
) ? void (0) : __assert_fail ("!SI->isVolatile() && \"Cannot split volatile stores!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4018, __extension__ __PRETTY_FUNCTION__
))
;
4019
4020 Stores.push_back(SI);
4021 } else {
4022 // Other uses cannot be pre-split.
4023 continue;
4024 }
4025
4026 // Record the initial split.
4027 LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Candidate: " << *I <<
"\n"; } } while (false)
;
4028 auto &Offsets = SplitOffsetsMap[I];
4029 assert(Offsets.Splits.empty() &&(static_cast <bool> (Offsets.Splits.empty() && "Should not have splits the first time we see an instruction!"
) ? void (0) : __assert_fail ("Offsets.Splits.empty() && \"Should not have splits the first time we see an instruction!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4030, __extension__ __PRETTY_FUNCTION__
))
4030 "Should not have splits the first time we see an instruction!")(static_cast <bool> (Offsets.Splits.empty() && "Should not have splits the first time we see an instruction!"
) ? void (0) : __assert_fail ("Offsets.Splits.empty() && \"Should not have splits the first time we see an instruction!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4030, __extension__ __PRETTY_FUNCTION__
))
;
4031 Offsets.S = &S;
4032 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4033 }
4034
4035 // Now scan the already split slices, and add a split for any of them which
4036 // we're going to pre-split.
4037 for (Slice *S : P.splitSliceTails()) {
4038 auto SplitOffsetsMapI =
4039 SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
4040 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4041 continue;
4042 auto &Offsets = SplitOffsetsMapI->second;
4043
4044 assert(Offsets.S == S && "Found a mismatched slice!")(static_cast <bool> (Offsets.S == S && "Found a mismatched slice!"
) ? void (0) : __assert_fail ("Offsets.S == S && \"Found a mismatched slice!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4044, __extension__ __PRETTY_FUNCTION__
))
;
4045 assert(!Offsets.Splits.empty() &&(static_cast <bool> (!Offsets.Splits.empty() &&
"Cannot have an empty set of splits on the second partition!"
) ? void (0) : __assert_fail ("!Offsets.Splits.empty() && \"Cannot have an empty set of splits on the second partition!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4046, __extension__ __PRETTY_FUNCTION__
))
4046 "Cannot have an empty set of splits on the second partition!")(static_cast <bool> (!Offsets.Splits.empty() &&
"Cannot have an empty set of splits on the second partition!"
) ? void (0) : __assert_fail ("!Offsets.Splits.empty() && \"Cannot have an empty set of splits on the second partition!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4046, __extension__ __PRETTY_FUNCTION__
))
;
4047 assert(Offsets.Splits.back() ==(static_cast <bool> (Offsets.Splits.back() == P.beginOffset
() - Offsets.S->beginOffset() && "Previous split does not end where this one begins!"
) ? void (0) : __assert_fail ("Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset() && \"Previous split does not end where this one begins!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4049, __extension__ __PRETTY_FUNCTION__
))
4048 P.beginOffset() - Offsets.S->beginOffset() &&(static_cast <bool> (Offsets.Splits.back() == P.beginOffset
() - Offsets.S->beginOffset() && "Previous split does not end where this one begins!"
) ? void (0) : __assert_fail ("Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset() && \"Previous split does not end where this one begins!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4049, __extension__ __PRETTY_FUNCTION__
))
4049 "Previous split does not end where this one begins!")(static_cast <bool> (Offsets.Splits.back() == P.beginOffset
() - Offsets.S->beginOffset() && "Previous split does not end where this one begins!"
) ? void (0) : __assert_fail ("Offsets.Splits.back() == P.beginOffset() - Offsets.S->beginOffset() && \"Previous split does not end where this one begins!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4049, __extension__ __PRETTY_FUNCTION__
))
;
4050
4051 // Record each split. The last partition's end isn't needed as the size
4052 // of the slice dictates that.
4053 if (S->endOffset() > P.endOffset())
4054 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4055 }
4056 }
4057
4058 // We may have split loads where some of their stores are split stores. For
4059 // such loads and stores, we can only pre-split them if their splits exactly
4060 // match relative to their starting offset. We have to verify this prior to
4061 // any rewriting.
4062 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4063 // Lookup the load we are storing in our map of split
4064 // offsets.
4065 auto *LI = cast<LoadInst>(SI->getValueOperand());
4066 // If it was completely unsplittable, then we're done,
4067 // and this store can't be pre-split.
4068 if (UnsplittableLoads.count(LI))
4069 return true;
4070
4071 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4072 if (LoadOffsetsI == SplitOffsetsMap.end())
4073 return false; // Unrelated loads are definitely safe.
4074 auto &LoadOffsets = LoadOffsetsI->second;
4075
4076 // Now lookup the store's offsets.
4077 auto &StoreOffsets = SplitOffsetsMap[SI];
4078
4079 // If the relative offsets of each split in the load and
4080 // store match exactly, then we can split them and we
4081 // don't need to remove them here.
4082 if (LoadOffsets.Splits == StoreOffsets.Splits)
4083 return false;
4084
4085 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Mismatched splits for load and store:\n"
<< " " << *LI << "\n" << " "
<< *SI << "\n"; } } while (false)
4086 << " " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Mismatched splits for load and store:\n"
<< " " << *LI << "\n" << " "
<< *SI << "\n"; } } while (false)
4087 << " " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Mismatched splits for load and store:\n"
<< " " << *LI << "\n" << " "
<< *SI << "\n"; } } while (false)
;
4088
4089 // We've found a store and load that we need to split
4090 // with mismatched relative splits. Just give up on them
4091 // and remove both instructions from our list of
4092 // candidates.
4093 UnsplittableLoads.insert(LI);
4094 return true;
4095 });
4096 // Now we have to go *back* through all the stores, because a later store may
4097 // have caused an earlier store's load to become unsplittable and if it is
4098 // unsplittable for the later store, then we can't rely on it being split in
4099 // the earlier store either.
4100 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4101 auto *LI = cast<LoadInst>(SI->getValueOperand());
4102 return UnsplittableLoads.count(LI);
4103 });
4104 // Once we've established all the loads that can't be split for some reason,
4105 // filter any that made it into our list out.
4106 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4107 return UnsplittableLoads.count(LI);
4108 });
4109
4110 // If no loads or stores are left, there is no pre-splitting to be done for
4111 // this alloca.
4112 if (Loads.empty() && Stores.empty())
4113 return false;
4114
4115 // From here on, we can't fail and will be building new accesses, so rig up
4116 // an IR builder.
4117 IRBuilderTy IRB(&AI);
4118
4119 // Collect the new slices which we will merge into the alloca slices.
4120 SmallVector<Slice, 4> NewSlices;
4121
4122 // Track any allocas we end up splitting loads and stores for so we iterate
4123 // on them.
4124 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4125
4126 // At this point, we have collected all of the loads and stores we can
4127 // pre-split, and the specific splits needed for them. We actually do the
4128 // splitting in a specific order in order to handle when one of the loads in
4129 // the value operand to one of the stores.
4130 //
4131 // First, we rewrite all of the split loads, and just accumulate each split
4132 // load in a parallel structure. We also build the slices for them and append
4133 // them to the alloca slices.
4134 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4135 std::vector<LoadInst *> SplitLoads;
4136 const DataLayout &DL = AI.getModule()->getDataLayout();
4137 for (LoadInst *LI : Loads) {
4138 SplitLoads.clear();
4139
4140 auto &Offsets = SplitOffsetsMap[LI];
4141 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4142 assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&(static_cast <bool> (LI->getType()->getIntegerBitWidth
() % 8 == 0 && "Load must have type size equal to store size"
) ? void (0) : __assert_fail ("LI->getType()->getIntegerBitWidth() % 8 == 0 && \"Load must have type size equal to store size\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4143, __extension__ __PRETTY_FUNCTION__
))
4143 "Load must have type size equal to store size")(static_cast <bool> (LI->getType()->getIntegerBitWidth
() % 8 == 0 && "Load must have type size equal to store size"
) ? void (0) : __assert_fail ("LI->getType()->getIntegerBitWidth() % 8 == 0 && \"Load must have type size equal to store size\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4143, __extension__ __PRETTY_FUNCTION__
))
;
4144 assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&(static_cast <bool> (LI->getType()->getIntegerBitWidth
() / 8 >= SliceSize && "Load must be >= slice size"
) ? void (0) : __assert_fail ("LI->getType()->getIntegerBitWidth() / 8 >= SliceSize && \"Load must be >= slice size\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4145, __extension__ __PRETTY_FUNCTION__
))
4145 "Load must be >= slice size")(static_cast <bool> (LI->getType()->getIntegerBitWidth
() / 8 >= SliceSize && "Load must be >= slice size"
) ? void (0) : __assert_fail ("LI->getType()->getIntegerBitWidth() / 8 >= SliceSize && \"Load must be >= slice size\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4145, __extension__ __PRETTY_FUNCTION__
))
;
4146
4147 uint64_t BaseOffset = Offsets.S->beginOffset();
4148 assert(BaseOffset + SliceSize > BaseOffset &&(static_cast <bool> (BaseOffset + SliceSize > BaseOffset
&& "Cannot represent alloca access size using 64-bit integers!"
) ? void (0) : __assert_fail ("BaseOffset + SliceSize > BaseOffset && \"Cannot represent alloca access size using 64-bit integers!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4149, __extension__ __PRETTY_FUNCTION__
))
4149 "Cannot represent alloca access size using 64-bit integers!")(static_cast <bool> (BaseOffset + SliceSize > BaseOffset
&& "Cannot represent alloca access size using 64-bit integers!"
) ? void (0) : __assert_fail ("BaseOffset + SliceSize > BaseOffset && \"Cannot represent alloca access size using 64-bit integers!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4149, __extension__ __PRETTY_FUNCTION__
))
;
4150
4151 Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
4152 IRB.SetInsertPoint(LI);
4153
4154 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Splitting load: " << *LI
<< "\n"; } } while (false)
;
4155
4156 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4157 int Idx = 0, Size = Offsets.Splits.size();
4158 for (;;) {
4159 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4160 auto AS = LI->getPointerAddressSpace();
4161 auto *PartPtrTy = PartTy->getPointerTo(AS);
4162 LoadInst *PLoad = IRB.CreateAlignedLoad(
4163 PartTy,
4164 getAdjustedPtr(IRB, DL, BasePtr,
4165 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4166 PartPtrTy, BasePtr->getName() + "."),
4167 getAdjustedAlignment(LI, PartOffset),
4168 /*IsVolatile*/ false, LI->getName());
4169 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4170 LLVMContext::MD_access_group});
4171
4172 // Append this load onto the list of split loads so we can find it later
4173 // to rewrite the stores.
4174 SplitLoads.push_back(PLoad);
4175
4176 // Now build a new slice for the alloca.
4177 NewSlices.push_back(
4178 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4179 &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
4180 /*IsSplittable*/ false));
4181 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " new slice [" << NewSlices
.back().beginOffset() << ", " << NewSlices.back()
.endOffset() << "): " << *PLoad << "\n"; } }
while (false)
4182 << ", " << NewSlices.back().endOffset()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " new slice [" << NewSlices
.back().beginOffset() << ", " << NewSlices.back()
.endOffset() << "): " << *PLoad << "\n"; } }
while (false)
4183 << "): " << *PLoad << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " new slice [" << NewSlices
.back().beginOffset() << ", " << NewSlices.back()
.endOffset() << "): " << *PLoad << "\n"; } }
while (false)
;
4184
4185 // See if we've handled all the splits.
4186 if (Idx >= Size)
4187 break;
4188
4189 // Setup the next partition.
4190 PartOffset = Offsets.Splits[Idx];
4191 ++Idx;
4192 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
4193 }
4194
4195 // Now that we have the split loads, do the slow walk over all uses of the
4196 // load and rewrite them as split stores, or save the split loads to use
4197 // below if the store is going to be split there anyways.
4198 bool DeferredStores = false;
4199 for (User *LU : LI->users()) {
4200 StoreInst *SI = cast<StoreInst>(LU);
4201 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
4202 DeferredStores = true;
4203 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Deferred splitting of store: "
<< *SI << "\n"; } } while (false)
4204 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Deferred splitting of store: "
<< *SI << "\n"; } } while (false)
;
4205 continue;
4206 }
4207
4208 Value *StoreBasePtr = SI->getPointerOperand();
4209 IRB.SetInsertPoint(SI);
4210
4211 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Splitting store of load: " <<
*SI << "\n"; } } while (false)
;
4212
4213 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
4214 LoadInst *PLoad = SplitLoads[Idx];
4215 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
4216 auto *PartPtrTy =
4217 PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
4218
4219 auto AS = SI->getPointerAddressSpace();
4220 StoreInst *PStore = IRB.CreateAlignedStore(
4221 PLoad,
4222 getAdjustedPtr(IRB, DL, StoreBasePtr,
4223 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4224 PartPtrTy, StoreBasePtr->getName() + "."),
4225 getAdjustedAlignment(SI, PartOffset),
4226 /*IsVolatile*/ false);
4227 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4228 LLVMContext::MD_access_group});
4229 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " +" << PartOffset <<
":" << *PStore << "\n"; } } while (false)
;
4230 }
4231
4232 // We want to immediately iterate on any allocas impacted by splitting
4233 // this store, and we have to track any promotable alloca (indicated by
4234 // a direct store) as needing to be resplit because it is no longer
4235 // promotable.
4236 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4237 ResplitPromotableAllocas.insert(OtherAI);
4238 Worklist.insert(OtherAI);
4239 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4240 StoreBasePtr->stripInBoundsOffsets())) {
4241 Worklist.insert(OtherAI);
4242 }
4243
4244 // Mark the original store as dead.
4245 DeadInsts.push_back(SI);
4246 }
4247
4248 // Save the split loads if there are deferred stores among the users.
4249 if (DeferredStores)
4250 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
4251
4252 // Mark the original load as dead and kill the original slice.
4253 DeadInsts.push_back(LI);
4254 Offsets.S->kill();
4255 }
4256
4257 // Second, we rewrite all of the split stores. At this point, we know that
4258 // all loads from this alloca have been split already. For stores of such
4259 // loads, we can simply look up the pre-existing split loads. For stores of
4260 // other loads, we split those loads first and then write split stores of
4261 // them.
4262 for (StoreInst *SI : Stores) {
4263 auto *LI = cast<LoadInst>(SI->getValueOperand());
4264 IntegerType *Ty = cast<IntegerType>(LI->getType());
4265 assert(Ty->getBitWidth() % 8 == 0)(static_cast <bool> (Ty->getBitWidth() % 8 == 0) ? void
(0) : __assert_fail ("Ty->getBitWidth() % 8 == 0", "llvm/lib/Transforms/Scalar/SROA.cpp"
, 4265, __extension__ __PRETTY_FUNCTION__))
;
4266 uint64_t StoreSize = Ty->getBitWidth() / 8;
4267 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!")(static_cast <bool> (StoreSize > 0 && "Cannot have a zero-sized integer store!"
) ? void (0) : __assert_fail ("StoreSize > 0 && \"Cannot have a zero-sized integer store!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4267, __extension__ __PRETTY_FUNCTION__
))
;
4268
4269 auto &Offsets = SplitOffsetsMap[SI];
4270 assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&(static_cast <bool> (StoreSize == Offsets.S->endOffset
() - Offsets.S->beginOffset() && "Slice size should always match load size exactly!"
) ? void (0) : __assert_fail ("StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && \"Slice size should always match load size exactly!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4271, __extension__ __PRETTY_FUNCTION__
))
4271 "Slice size should always match load size exactly!")(static_cast <bool> (StoreSize == Offsets.S->endOffset
() - Offsets.S->beginOffset() && "Slice size should always match load size exactly!"
) ? void (0) : __assert_fail ("StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && \"Slice size should always match load size exactly!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4271, __extension__ __PRETTY_FUNCTION__
))
;
4272 uint64_t BaseOffset = Offsets.S->beginOffset();
4273 assert(BaseOffset + StoreSize > BaseOffset &&(static_cast <bool> (BaseOffset + StoreSize > BaseOffset
&& "Cannot represent alloca access size using 64-bit integers!"
) ? void (0) : __assert_fail ("BaseOffset + StoreSize > BaseOffset && \"Cannot represent alloca access size using 64-bit integers!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4274, __extension__ __PRETTY_FUNCTION__
))
4274 "Cannot represent alloca access size using 64-bit integers!")(static_cast <bool> (BaseOffset + StoreSize > BaseOffset
&& "Cannot represent alloca access size using 64-bit integers!"
) ? void (0) : __assert_fail ("BaseOffset + StoreSize > BaseOffset && \"Cannot represent alloca access size using 64-bit integers!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4274, __extension__ __PRETTY_FUNCTION__
))
;
4275
4276 Value *LoadBasePtr = LI->getPointerOperand();
4277 Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
4278
4279 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Splitting store: " << *SI
<< "\n"; } } while (false)
;
4280
4281 // Check whether we have an already split load.
4282 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
4283 std::vector<LoadInst *> *SplitLoads = nullptr;
4284 if (SplitLoadsMapI != SplitLoadsMap.end()) {
4285 SplitLoads = &SplitLoadsMapI->second;
4286 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&(static_cast <bool> (SplitLoads->size() == Offsets.Splits
.size() + 1 && "Too few split loads for the number of splits in the store!"
) ? void (0) : __assert_fail ("SplitLoads->size() == Offsets.Splits.size() + 1 && \"Too few split loads for the number of splits in the store!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4287, __extension__ __PRETTY_FUNCTION__
))
4287 "Too few split loads for the number of splits in the store!")(static_cast <bool> (SplitLoads->size() == Offsets.Splits
.size() + 1 && "Too few split loads for the number of splits in the store!"
) ? void (0) : __assert_fail ("SplitLoads->size() == Offsets.Splits.size() + 1 && \"Too few split loads for the number of splits in the store!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4287, __extension__ __PRETTY_FUNCTION__
))
;
4288 } else {
4289 LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " of load: " << *LI
<< "\n"; } } while (false)
;
4290 }
4291
4292 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4293 int Idx = 0, Size = Offsets.Splits.size();
4294 for (;;) {
4295 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
4296 auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
4297 auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
4298
4299 // Either lookup a split load or create one.
4300 LoadInst *PLoad;
4301 if (SplitLoads) {
4302 PLoad = (*SplitLoads)[Idx];
4303 } else {
4304 IRB.SetInsertPoint(LI);
4305 auto AS = LI->getPointerAddressSpace();
4306 PLoad = IRB.CreateAlignedLoad(
4307 PartTy,
4308 getAdjustedPtr(IRB, DL, LoadBasePtr,
4309 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4310 LoadPartPtrTy, LoadBasePtr->getName() + "."),
4311 getAdjustedAlignment(LI, PartOffset),
4312 /*IsVolatile*/ false, LI->getName());
4313 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4314 LLVMContext::MD_access_group});
4315 }
4316
4317 // And store this partition.
4318 IRB.SetInsertPoint(SI);
4319 auto AS = SI->getPointerAddressSpace();
4320 StoreInst *PStore = IRB.CreateAlignedStore(
4321 PLoad,
4322 getAdjustedPtr(IRB, DL, StoreBasePtr,
4323 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4324 StorePartPtrTy, StoreBasePtr->getName() + "."),
4325 getAdjustedAlignment(SI, PartOffset),
4326 /*IsVolatile*/ false);
4327 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4328 LLVMContext::MD_access_group});
4329
4330 // Now build a new slice for the alloca.
4331 NewSlices.push_back(
4332 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4333 &PStore->getOperandUse(PStore->getPointerOperandIndex()),
4334 /*IsSplittable*/ false));
4335 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " new slice [" << NewSlices
.back().beginOffset() << ", " << NewSlices.back()
.endOffset() << "): " << *PStore << "\n"; }
} while (false)
4336 << ", " << NewSlices.back().endOffset()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " new slice [" << NewSlices
.back().beginOffset() << ", " << NewSlices.back()
.endOffset() << "): " << *PStore << "\n"; }
} while (false)
4337 << "): " << *PStore << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " new slice [" << NewSlices
.back().beginOffset() << ", " << NewSlices.back()
.endOffset() << "): " << *PStore << "\n"; }
} while (false)
;
4338 if (!SplitLoads) {
4339 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " of split load: " << *
PLoad << "\n"; } } while (false)
;
4340 }
4341
4342 // See if we've finished all the splits.
4343 if (Idx >= Size)
4344 break;
4345
4346 // Setup the next partition.
4347 PartOffset = Offsets.Splits[Idx];
4348 ++Idx;
4349 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
4350 }
4351
4352 // We want to immediately iterate on any allocas impacted by splitting
4353 // this load, which is only relevant if it isn't a load of this alloca and
4354 // thus we didn't already split the loads above. We also have to keep track
4355 // of any promotable allocas we split loads on as they can no longer be
4356 // promoted.
4357 if (!SplitLoads) {
4358 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4359 assert(OtherAI != &AI && "We can't re-split our own alloca!")(static_cast <bool> (OtherAI != &AI && "We can't re-split our own alloca!"
) ? void (0) : __assert_fail ("OtherAI != &AI && \"We can't re-split our own alloca!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4359, __extension__ __PRETTY_FUNCTION__
))
;
4360 ResplitPromotableAllocas.insert(OtherAI);
4361 Worklist.insert(OtherAI);
4362 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4363 LoadBasePtr->stripInBoundsOffsets())) {
4364 assert(OtherAI != &AI && "We can't re-split our own alloca!")(static_cast <bool> (OtherAI != &AI && "We can't re-split our own alloca!"
) ? void (0) : __assert_fail ("OtherAI != &AI && \"We can't re-split our own alloca!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4364, __extension__ __PRETTY_FUNCTION__
))
;
4365 Worklist.insert(OtherAI);
4366 }
4367 }
4368
4369 // Mark the original store as dead now that we've split it up and kill its
4370 // slice. Note that we leave the original load in place unless this store
4371 // was its only use. It may in turn be split up if it is an alloca load
4372 // for some other alloca, but it may be a normal load. This may introduce
4373 // redundant loads, but where those can be merged the rest of the optimizer
4374 // should handle the merging, and this uncovers SSA splits which is more
4375 // important. In practice, the original loads will almost always be fully
4376 // split and removed eventually, and the splits will be merged by any
4377 // trivial CSE, including instcombine.
4378 if (LI->hasOneUse()) {
4379 assert(*LI->user_begin() == SI && "Single use isn't this store!")(static_cast <bool> (*LI->user_begin() == SI &&
"Single use isn't this store!") ? void (0) : __assert_fail (
"*LI->user_begin() == SI && \"Single use isn't this store!\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4379, __extension__ __PRETTY_FUNCTION__
))
;
4380 DeadInsts.push_back(LI);
4381 }
4382 DeadInsts.push_back(SI);
4383 Offsets.S->kill();
4384 }
4385
4386 // Remove the killed slices that have ben pre-split.
4387 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
4388
4389 // Insert our new slices. This will sort and merge them into the sorted
4390 // sequence.
4391 AS.insert(NewSlices);
4392
4393 LLVM_DEBUG(dbgs() << " Pre-split slices:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Pre-split slices:\n"; } } while
(false)
;
4394#ifndef NDEBUG
4395 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
4396 LLVM_DEBUG(AS.print(dbgs(), I, " "))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { AS.print(dbgs(), I, " "); } } while (false)
;
4397#endif
4398
4399 // Finally, don't try to promote any allocas that new require re-splitting.
4400 // They have already been added to the worklist above.
4401 llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) {
4402 return ResplitPromotableAllocas.count(AI);
4403 });
4404
4405 return true;
4406}
4407
4408/// Rewrite an alloca partition's users.
4409///
4410/// This routine drives both of the rewriting goals of the SROA pass. It tries
4411/// to rewrite uses of an alloca partition to be conducive for SSA value
4412/// promotion. If the partition needs a new, more refined alloca, this will
4413/// build that new alloca, preserving as much type information as possible, and
4414/// rewrite the uses of the old alloca to point at the new one and have the
4415/// appropriate new offsets. It also evaluates how successful the rewrite was
4416/// at enabling promotion and if it was successful queues the alloca to be
4417/// promoted.
4418AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
4419 Partition &P) {
4420 // Try to compute a friendly type for this partition of the alloca. This
4421 // won't always succeed, in which case we fall back to a legal integer type
4422 // or an i8 array of an appropriate size.
4423 Type *SliceTy = nullptr;
4424 VectorType *SliceVecTy = nullptr;
4425 const DataLayout &DL = AI.getModule()->getDataLayout();
4426 std::pair<Type *, IntegerType *> CommonUseTy =
4427 findCommonType(P.begin(), P.end(), P.endOffset());
4428 // Do all uses operate on the same type?
4429 if (CommonUseTy.first)
4430 if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size()) {
4431 SliceTy = CommonUseTy.first;
4432 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4433 }
4434 // If not, can we find an appropriate subtype in the original allocated type?
4435 if (!SliceTy)
4436 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4437 P.beginOffset(), P.size()))
4438 SliceTy = TypePartitionTy;
4439
4440 // If still not, can we use the largest bitwidth integer type used?
4441 if (!SliceTy && CommonUseTy.second)
4442 if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size()) {
4443 SliceTy = CommonUseTy.second;
4444 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4445 }
4446 if ((!SliceTy || (SliceTy->isArrayTy() &&
4447 SliceTy->getArrayElementType()->isIntegerTy())) &&
4448 DL.isLegalInteger(P.size() * 8)) {
4449 SliceTy = Type::getIntNTy(*C, P.size() * 8);
4450 }
4451
4452 // If the common use types are not viable for promotion then attempt to find
4453 // another type that is viable.
4454 if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL))
4455 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4456 P.beginOffset(), P.size())) {
4457 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4458 if (TypePartitionVecTy &&
4459 checkVectorTypeForPromotion(P, TypePartitionVecTy, DL))
4460 SliceTy = TypePartitionTy;
4461 }
4462
4463 if (!SliceTy)
4464 SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
4465 assert(DL.getTypeAllocSize(SliceTy).getFixedSize() >= P.size())(static_cast <bool> (DL.getTypeAllocSize(SliceTy).getFixedSize
() >= P.size()) ? void (0) : __assert_fail ("DL.getTypeAllocSize(SliceTy).getFixedSize() >= P.size()"
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4465, __extension__ __PRETTY_FUNCTION__
))
;
4466
4467 bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
4468
4469 VectorType *VecTy =
4470 IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
4471 if (VecTy)
4472 SliceTy = VecTy;
4473
4474 // Check for the case where we're going to rewrite to a new alloca of the
4475 // exact same type as the original, and with the same access offsets. In that
4476 // case, re-use the existing alloca, but still run through the rewriter to
4477 // perform phi and select speculation.
4478 // P.beginOffset() can be non-zero even with the same type in a case with
4479 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
4480 AllocaInst *NewAI;
4481 if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
4482 NewAI = &AI;
4483 // FIXME: We should be able to bail at this point with "nothing changed".
4484 // FIXME: We might want to defer PHI speculation until after here.
4485 // FIXME: return nullptr;
4486 } else {
4487 // Make sure the alignment is compatible with P.beginOffset().
4488 const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
4489 // If we will get at least this much alignment from the type alone, leave
4490 // the alloca's alignment unconstrained.
4491 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
4492 NewAI = new AllocaInst(
4493 SliceTy, AI.getAddressSpace(), nullptr,
4494 IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
4495 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
4496 // Copy the old AI debug location over to the new one.
4497 NewAI->setDebugLoc(AI.getDebugLoc());
4498 ++NumNewAllocas;
4499 }
4500
4501 LLVM_DEBUG(dbgs() << "Rewriting alloca partition "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "Rewriting alloca partition " <<
"[" << P.beginOffset() << "," << P.endOffset
() << ") to: " << *NewAI << "\n"; } } while
(false)
4502 << "[" << P.beginOffset() << "," << P.endOffset()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "Rewriting alloca partition " <<
"[" << P.beginOffset() << "," << P.endOffset
() << ") to: " << *NewAI << "\n"; } } while
(false)
4503 << ") to: " << *NewAI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "Rewriting alloca partition " <<
"[" << P.beginOffset() << "," << P.endOffset
() << ") to: " << *NewAI << "\n"; } } while
(false)
;
4504
4505 // Track the high watermark on the worklist as it is only relevant for
4506 // promoted allocas. We will reset it to this point if the alloca is not in
4507 // fact scheduled for promotion.
4508 unsigned PPWOldSize = PostPromotionWorklist.size();
4509 unsigned NumUses = 0;
4510 SmallSetVector<PHINode *, 8> PHIUsers;
4511 SmallSetVector<SelectInst *, 8> SelectUsers;
4512
4513 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
4514 P.endOffset(), IsIntegerPromotable, VecTy,
4515 PHIUsers, SelectUsers);
4516 bool Promotable = true;
4517 for (Slice *S : P.splitSliceTails()) {
4518 Promotable &= Rewriter.visit(S);
4519 ++NumUses;
4520 }
4521 for (Slice &S : P) {
4522 Promotable &= Rewriter.visit(&S);
4523 ++NumUses;
4524 }
4525
4526 NumAllocaPartitionUses += NumUses;
4527 MaxUsesPerAllocaPartition.updateMax(NumUses);
4528
4529 // Now that we've processed all the slices in the new partition, check if any
4530 // PHIs or Selects would block promotion.
4531 for (PHINode *PHI : PHIUsers)
4532 if (!isSafePHIToSpeculate(*PHI)) {
4533 Promotable = false;
4534 PHIUsers.clear();
4535 SelectUsers.clear();
4536 break;
4537 }
4538
4539 SmallVector<std::pair<SelectInst *, RewriteableMemOps>, 2>
4540 NewSelectsToRewrite;
4541 NewSelectsToRewrite.reserve(SelectUsers.size());
4542 for (SelectInst *Sel : SelectUsers) {
4543 std::optional<RewriteableMemOps> Ops =
4544 isSafeSelectToSpeculate(*Sel, PreserveCFG);
4545 if (!Ops) {
4546 Promotable = false;
4547 PHIUsers.clear();
4548 SelectUsers.clear();
4549 NewSelectsToRewrite.clear();
4550 break;
4551 }
4552 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
4553 }
4554
4555 if (Promotable) {
4556 for (Use *U : AS.getDeadUsesIfPromotable()) {
4557 auto *OldInst = dyn_cast<Instruction>(U->get());
4558 Value::dropDroppableUse(*U);
4559 if (OldInst)
4560 if (isInstructionTriviallyDead(OldInst))
4561 DeadInsts.push_back(OldInst);
4562 }
4563 if (PHIUsers.empty() && SelectUsers.empty()) {
4564 // Promote the alloca.
4565 PromotableAllocas.push_back(NewAI);
4566 } else {
4567 // If we have either PHIs or Selects to speculate, add them to those
4568 // worklists and re-queue the new alloca so that we promote in on the
4569 // next iteration.
4570 for (PHINode *PHIUser : PHIUsers)
4571 SpeculatablePHIs.insert(PHIUser);
4572 SelectsToRewrite.reserve(SelectsToRewrite.size() +
4573 NewSelectsToRewrite.size());
4574 for (auto &&KV : llvm::make_range(
4575 std::make_move_iterator(NewSelectsToRewrite.begin()),
4576 std::make_move_iterator(NewSelectsToRewrite.end())))
4577 SelectsToRewrite.insert(std::move(KV));
4578 Worklist.insert(NewAI);
4579 }
4580 } else {
4581 // Drop any post-promotion work items if promotion didn't happen.
4582 while (PostPromotionWorklist.size() > PPWOldSize)
4583 PostPromotionWorklist.pop_back();
4584
4585 // We couldn't promote and we didn't create a new partition, nothing
4586 // happened.
4587 if (NewAI == &AI)
4588 return nullptr;
4589
4590 // If we can't promote the alloca, iterate on it to check for new
4591 // refinements exposed by splitting the current alloca. Don't iterate on an
4592 // alloca which didn't actually change and didn't get promoted.
4593 Worklist.insert(NewAI);
4594 }
4595
4596 return NewAI;
4597}
4598
4599/// Walks the slices of an alloca and form partitions based on them,
4600/// rewriting each of their uses.
4601bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
4602 if (AS.begin() == AS.end())
4603 return false;
4604
4605 unsigned NumPartitions = 0;
4606 bool Changed = false;
4607 const DataLayout &DL = AI.getModule()->getDataLayout();
4608
4609 // First try to pre-split loads and stores.
4610 Changed |= presplitLoadsAndStores(AI, AS);
4611
4612 // Now that we have identified any pre-splitting opportunities,
4613 // mark loads and stores unsplittable except for the following case.
4614 // We leave a slice splittable if all other slices are disjoint or fully
4615 // included in the slice, such as whole-alloca loads and stores.
4616 // If we fail to split these during pre-splitting, we want to force them
4617 // to be rewritten into a partition.
4618 bool IsSorted = true;
4619
4620 uint64_t AllocaSize =
4621 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize();
4622 const uint64_t MaxBitVectorSize = 1024;
4623 if (AllocaSize <= MaxBitVectorSize) {
4624 // If a byte boundary is included in any load or store, a slice starting or
4625 // ending at the boundary is not splittable.
4626 SmallBitVector SplittableOffset(AllocaSize + 1, true);
4627 for (Slice &S : AS)
4628 for (unsigned O = S.beginOffset() + 1;
4629 O < S.endOffset() && O < AllocaSize; O++)
4630 SplittableOffset.reset(O);
4631
4632 for (Slice &S : AS) {
4633 if (!S.isSplittable())
4634 continue;
4635
4636 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
4637 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
4638 continue;
4639
4640 if (isa<LoadInst>(S.getUse()->getUser()) ||
4641 isa<StoreInst>(S.getUse()->getUser())) {
4642 S.makeUnsplittable();
4643 IsSorted = false;
4644 }
4645 }
4646 }
4647 else {
4648 // We only allow whole-alloca splittable loads and stores
4649 // for a large alloca to avoid creating too large BitVector.
4650 for (Slice &S : AS) {
4651 if (!S.isSplittable())
4652 continue;
4653
4654 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
4655 continue;
4656
4657 if (isa<LoadInst>(S.getUse()->getUser()) ||
4658 isa<StoreInst>(S.getUse()->getUser())) {
4659 S.makeUnsplittable();
4660 IsSorted = false;
4661 }
4662 }
4663 }
4664
4665 if (!IsSorted)
4666 llvm::sort(AS);
4667
4668 /// Describes the allocas introduced by rewritePartition in order to migrate
4669 /// the debug info.
4670 struct Fragment {
4671 AllocaInst *Alloca;
4672 uint64_t Offset;
4673 uint64_t Size;
4674 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
4675 : Alloca(AI), Offset(O), Size(S) {}
4676 };
4677 SmallVector<Fragment, 4> Fragments;
4678
4679 // Rewrite each partition.
4680 for (auto &P : AS.partitions()) {
4681 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
4682 Changed = true;
4683 if (NewAI != &AI) {
4684 uint64_t SizeOfByte = 8;
4685 uint64_t AllocaSize =
4686 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedSize();
4687 // Don't include any padding.
4688 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
4689 Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
4690 }
4691 }
4692 ++NumPartitions;
4693 }
4694
4695 NumAllocaPartitions += NumPartitions;
4696 MaxPartitionsPerAlloca.updateMax(NumPartitions);
4697
4698 // Migrate debug information from the old alloca to the new alloca(s)
4699 // and the individual partitions.
4700 TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
4701 for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) {
4702 auto *Expr = DbgDeclare->getExpression();
4703 DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
4704 uint64_t AllocaSize =
4705 DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize();
4706 for (auto Fragment : Fragments) {
4707 // Create a fragment expression describing the new partition or reuse AI's
4708 // expression if there is only one partition.
4709 auto *FragmentExpr = Expr;
4710 if (Fragment.Size < AllocaSize || Expr->isFragment()) {
4711 // If this alloca is already a scalar replacement of a larger aggregate,
4712 // Fragment.Offset describes the offset inside the scalar.
4713 auto ExprFragment = Expr->getFragmentInfo();
4714 uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
4715 uint64_t Start = Offset + Fragment.Offset;
4716 uint64_t Size = Fragment.Size;
4717 if (ExprFragment) {
4718 uint64_t AbsEnd =
4719 ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
4720 if (Start >= AbsEnd)
4721 // No need to describe a SROAed padding.
4722 continue;
4723 Size = std::min(Size, AbsEnd - Start);
4724 }
4725 // The new, smaller fragment is stenciled out from the old fragment.
4726 if (auto OrigFragment = FragmentExpr->getFragmentInfo()) {
4727 assert(Start >= OrigFragment->OffsetInBits &&(static_cast <bool> (Start >= OrigFragment->OffsetInBits
&& "new fragment is outside of original fragment") ?
void (0) : __assert_fail ("Start >= OrigFragment->OffsetInBits && \"new fragment is outside of original fragment\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4728, __extension__ __PRETTY_FUNCTION__
))
4728 "new fragment is outside of original fragment")(static_cast <bool> (Start >= OrigFragment->OffsetInBits
&& "new fragment is outside of original fragment") ?
void (0) : __assert_fail ("Start >= OrigFragment->OffsetInBits && \"new fragment is outside of original fragment\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4728, __extension__ __PRETTY_FUNCTION__
))
;
4729 Start -= OrigFragment->OffsetInBits;
4730 }
4731
4732 // The alloca may be larger than the variable.
4733 auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
4734 if (VarSize) {
4735 if (Size > *VarSize)
4736 Size = *VarSize;
4737 if (Size == 0 || Start + Size > *VarSize)
4738 continue;
4739 }
4740
4741 // Avoid creating a fragment expression that covers the entire variable.
4742 if (!VarSize || *VarSize != Size) {
4743 if (auto E =
4744 DIExpression::createFragmentExpression(Expr, Start, Size))
4745 FragmentExpr = *E;
4746 else
4747 continue;
4748 }
4749 }
4750
4751 // Remove any existing intrinsics on the new alloca describing
4752 // the variable fragment.
4753 for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) {
4754 auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS,
4755 const DbgVariableIntrinsic *RHS) {
4756 return LHS->getVariable() == RHS->getVariable() &&
4757 LHS->getDebugLoc()->getInlinedAt() ==
4758 RHS->getDebugLoc()->getInlinedAt();
4759 };
4760 if (SameVariableFragment(OldDII, DbgDeclare))
4761 OldDII->eraseFromParent();
4762 }
4763
4764 DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr,
4765 DbgDeclare->getDebugLoc(), &AI);
4766 }
4767 }
4768 return Changed;
4769}
4770
4771/// Clobber a use with poison, deleting the used value if it becomes dead.
4772void SROAPass::clobberUse(Use &U) {
4773 Value *OldV = U;
4774 // Replace the use with an poison value.
4775 U = PoisonValue::get(OldV->getType());
4776
4777 // Check for this making an instruction dead. We have to garbage collect
4778 // all the dead instructions to ensure the uses of any alloca end up being
4779 // minimal.
4780 if (Instruction *OldI = dyn_cast<Instruction>(OldV))
4781 if (isInstructionTriviallyDead(OldI)) {
4782 DeadInsts.push_back(OldI);
4783 }
4784}
4785
4786/// Analyze an alloca for SROA.
4787///
4788/// This analyzes the alloca to ensure we can reason about it, builds
4789/// the slices of the alloca, and then hands it off to be split and
4790/// rewritten as needed.
4791std::pair<bool /*Changed*/, bool /*CFGChanged*/>
4792SROAPass::runOnAlloca(AllocaInst &AI) {
4793 bool Changed = false;
4794 bool CFGChanged = false;
4795
4796 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "SROA alloca: " << AI <<
"\n"; } } while (false)
;
4797 ++NumAllocasAnalyzed;
4798
4799 // Special case dead allocas, as they're trivial.
4800 if (AI.use_empty()) {
4801 AI.eraseFromParent();
4802 Changed = true;
4803 return {Changed, CFGChanged};
4804 }
4805 const DataLayout &DL = AI.getModule()->getDataLayout();
4806
4807 // Skip alloca forms that this analysis can't handle.
4808 auto *AT = AI.getAllocatedType();
4809 if (AI.isArrayAllocation() || !AT->isSized() || isa<ScalableVectorType>(AT) ||
4810 DL.getTypeAllocSize(AT).getFixedSize() == 0)
4811 return {Changed, CFGChanged};
4812
4813 // First, split any FCA loads and stores touching this alloca to promote
4814 // better splitting and promotion opportunities.
4815 IRBuilderTy IRB(&AI);
4816 AggLoadStoreRewriter AggRewriter(DL, IRB);
4817 Changed |= AggRewriter.rewrite(AI);
4818
4819 // Build the slices using a recursive instruction-visiting builder.
4820 AllocaSlices AS(DL, AI);
4821 LLVM_DEBUG(AS.print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { AS.print(dbgs()); } } while (false)
;
4822 if (AS.isEscaped())
4823 return {Changed, CFGChanged};
4824
4825 // Delete all the dead users of this alloca before splitting and rewriting it.
4826 for (Instruction *DeadUser : AS.getDeadUsers()) {
4827 // Free up everything used by this instruction.
4828 for (Use &DeadOp : DeadUser->operands())
4829 clobberUse(DeadOp);
4830
4831 // Now replace the uses of this instruction.
4832 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
4833
4834 // And mark it for deletion.
4835 DeadInsts.push_back(DeadUser);
4836 Changed = true;
4837 }
4838 for (Use *DeadOp : AS.getDeadOperands()) {
4839 clobberUse(*DeadOp);
4840 Changed = true;
4841 }
4842
4843 // No slices to split. Leave the dead alloca for a later pass to clean up.
4844 if (AS.begin() == AS.end())
4845 return {Changed, CFGChanged};
4846
4847 Changed |= splitAlloca(AI, AS);
4848
4849 LLVM_DEBUG(dbgs() << " Speculating PHIs\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Speculating PHIs\n"; } } while
(false)
;
4850 while (!SpeculatablePHIs.empty())
4851 speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
4852
4853 LLVM_DEBUG(dbgs() << " Rewriting Selects\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << " Rewriting Selects\n"; } } while
(false)
;
4854 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
4855 while (!RemainingSelectsToRewrite.empty()) {
4856 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
4857 CFGChanged |=
4858 rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
4859 }
4860
4861 return {Changed, CFGChanged};
4862}
4863
4864/// Delete the dead instructions accumulated in this run.
4865///
4866/// Recursively deletes the dead instructions we've accumulated. This is done
4867/// at the very end to maximize locality of the recursive delete and to
4868/// minimize the problems of invalidated instruction pointers as such pointers
4869/// are used heavily in the intermediate stages of the algorithm.
4870///
4871/// We also record the alloca instructions deleted here so that they aren't
4872/// subsequently handed to mem2reg to promote.
4873bool SROAPass::deleteDeadInstructions(
4874 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
4875 bool Changed = false;
4876 while (!DeadInsts.empty()) {
4877 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
4878 if (!I)
4879 continue;
4880 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "Deleting dead instruction: " <<
*I << "\n"; } } while (false)
;
4881
4882 // If the instruction is an alloca, find the possible dbg.declare connected
4883 // to it, and remove it too. We must do this before calling RAUW or we will
4884 // not be able to find it.
4885 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
4886 DeletedAllocas.insert(AI);
4887 for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI))
4888 OldDII->eraseFromParent();
4889 }
4890
4891 I->replaceAllUsesWith(UndefValue::get(I->getType()));
4892
4893 for (Use &Operand : I->operands())
4894 if (Instruction *U = dyn_cast<Instruction>(Operand)) {
4895 // Zero out the operand and see if it becomes trivially dead.
4896 Operand = nullptr;
4897 if (isInstructionTriviallyDead(U))
4898 DeadInsts.push_back(U);
4899 }
4900
4901 ++NumDeleted;
4902 I->eraseFromParent();
4903 Changed = true;
4904 }
4905 return Changed;
4906}
4907
4908/// Promote the allocas, using the best available technique.
4909///
4910/// This attempts to promote whatever allocas have been identified as viable in
4911/// the PromotableAllocas list. If that list is empty, there is nothing to do.
4912/// This function returns whether any promotion occurred.
4913bool SROAPass::promoteAllocas(Function &F) {
4914 if (PromotableAllocas.empty())
4915 return false;
4916
4917 NumPromoted += PromotableAllocas.size();
4918
4919 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "Promoting allocas with mem2reg...\n"
; } } while (false)
;
4920 PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC);
4921 PromotableAllocas.clear();
4922 return true;
4923}
4924
4925PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU,
4926 AssumptionCache &RunAC) {
4927 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("sroa")) { dbgs() << "SROA function: " << F.getName
() << "\n"; } } while (false)
;
4928 C = &F.getContext();
4929 DTU = &RunDTU;
4930 AC = &RunAC;
4931
4932 BasicBlock &EntryBB = F.getEntryBlock();
4933 for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
4934 I != E; ++I) {
4935 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
4936 if (isa<ScalableVectorType>(AI->getAllocatedType())) {
4937 if (isAllocaPromotable(AI))
4938 PromotableAllocas.push_back(AI);
4939 } else {
4940 Worklist.insert(AI);
4941 }
4942 }
4943 }
4944
4945 bool Changed = false;
4946 bool CFGChanged = false;
4947 // A set of deleted alloca instruction pointers which should be removed from
4948 // the list of promotable allocas.
4949 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
4950
4951 do {
4952 while (!Worklist.empty()) {
4953 auto [IterationChanged, IterationCFGChanged] =
4954 runOnAlloca(*Worklist.pop_back_val());
4955 Changed |= IterationChanged;
4956 CFGChanged |= IterationCFGChanged;
4957
4958 Changed |= deleteDeadInstructions(DeletedAllocas);
4959
4960 // Remove the deleted allocas from various lists so that we don't try to
4961 // continue processing them.
4962 if (!DeletedAllocas.empty()) {
4963 auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
4964 Worklist.remove_if(IsInSet);
4965 PostPromotionWorklist.remove_if(IsInSet);
4966 llvm::erase_if(PromotableAllocas, IsInSet);
4967 DeletedAllocas.clear();
4968 }
4969 }
4970
4971 Changed |= promoteAllocas(F);
4972
4973 Worklist = PostPromotionWorklist;
4974 PostPromotionWorklist.clear();
4975 } while (!Worklist.empty());
4976
4977 assert((!CFGChanged || Changed) && "Can not only modify the CFG.")(static_cast <bool> ((!CFGChanged || Changed) &&
"Can not only modify the CFG.") ? void (0) : __assert_fail (
"(!CFGChanged || Changed) && \"Can not only modify the CFG.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4977, __extension__ __PRETTY_FUNCTION__
))
;
4978 assert((!CFGChanged || !PreserveCFG) &&(static_cast <bool> ((!CFGChanged || !PreserveCFG) &&
"Should not have modified the CFG when told to preserve it."
) ? void (0) : __assert_fail ("(!CFGChanged || !PreserveCFG) && \"Should not have modified the CFG when told to preserve it.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4979, __extension__ __PRETTY_FUNCTION__
))
4979 "Should not have modified the CFG when told to preserve it.")(static_cast <bool> ((!CFGChanged || !PreserveCFG) &&
"Should not have modified the CFG when told to preserve it."
) ? void (0) : __assert_fail ("(!CFGChanged || !PreserveCFG) && \"Should not have modified the CFG when told to preserve it.\""
, "llvm/lib/Transforms/Scalar/SROA.cpp", 4979, __extension__ __PRETTY_FUNCTION__
))
;
4980
4981 if (!Changed)
4982 return PreservedAnalyses::all();
4983
4984 PreservedAnalyses PA;
4985 if (!CFGChanged)
4986 PA.preserveSet<CFGAnalyses>();
4987 PA.preserve<DominatorTreeAnalysis>();
4988 return PA;
4989}
4990
4991PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
4992 AssumptionCache &RunAC) {
4993 DomTreeUpdater DTU(RunDT, DomTreeUpdater::UpdateStrategy::Lazy);
4994 return runImpl(F, DTU, RunAC);
4995}
4996
4997PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
4998 return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
4999 AM.getResult<AssumptionAnalysis>(F));
5000}
5001
5002void SROAPass::printPipeline(
5003 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5004 static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
5005 OS, MapClassName2PassName);
5006 OS << (PreserveCFG ? "<preserve-cfg>" : "<modify-cfg>");
5007}
5008
5009SROAPass::SROAPass(SROAOptions PreserveCFG_)
5010 : PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
5011
5012/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
5013///
5014/// This is in the llvm namespace purely to allow it to be a friend of the \c
5015/// SROA pass.
5016class llvm::sroa::SROALegacyPass : public FunctionPass {
5017 /// The SROA implementation.
5018 SROAPass Impl;
5019
5020public:
5021 static char ID;
5022
5023 SROALegacyPass(SROAOptions PreserveCFG = SROAOptions::PreserveCFG)
5024 : FunctionPass(ID), Impl(PreserveCFG) {
5025 initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
5026 }
5027
5028 bool runOnFunction(Function &F) override {
5029 if (skipFunction(F))
5030 return false;
5031
5032 auto PA = Impl.runImpl(
5033 F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
5034 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
5035 return !PA.areAllPreserved();
5036 }
5037
5038 void getAnalysisUsage(AnalysisUsage &AU) const override {
5039 AU.addRequired<AssumptionCacheTracker>();
5040 AU.addRequired<DominatorTreeWrapperPass>();
5041 AU.addPreserved<GlobalsAAWrapperPass>();
5042 AU.addPreserved<DominatorTreeWrapperPass>();
5043 }
5044
5045 StringRef getPassName() const override { return "SROA"; }
5046};
5047
5048char SROALegacyPass::ID = 0;
5049
5050FunctionPass *llvm::createSROAPass(bool PreserveCFG) {
5051 return new SROALegacyPass(PreserveCFG ? SROAOptions::PreserveCFG
5052 : SROAOptions::ModifyCFG);
5053}
5054
5055INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",static void *initializeSROALegacyPassPassOnce(PassRegistry &
Registry) {
5056 "Scalar Replacement Of Aggregates", false, false)static void *initializeSROALegacyPassPassOnce(PassRegistry &
Registry) {
5057INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
5058INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
5059INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",PassInfo *PI = new PassInfo( "Scalar Replacement Of Aggregates"
, "sroa", &SROALegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<SROALegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeSROALegacyPassPassFlag
; void llvm::initializeSROALegacyPassPass(PassRegistry &Registry
) { llvm::call_once(InitializeSROALegacyPassPassFlag, initializeSROALegacyPassPassOnce
, std::ref(Registry)); }
5060 false, false)PassInfo *PI = new PassInfo( "Scalar Replacement Of Aggregates"
, "sroa", &SROALegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<SROALegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeSROALegacyPassPassFlag
; void llvm::initializeSROALegacyPassPass(PassRegistry &Registry
) { llvm::call_once(InitializeSROALegacyPassPassFlag, initializeSROALegacyPassPassOnce
, std::ref(Registry)); }