LLVM API Documentation
00001 //===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// \file 00010 /// This transformation implements the well known scalar replacement of 00011 /// aggregates transformation. It tries to identify promotable elements of an 00012 /// aggregate alloca, and promote them to registers. It will also try to 00013 /// convert uses of an element (or set of elements) of an alloca into a vector 00014 /// or bitfield-style integer scalar if appropriate. 00015 /// 00016 /// It works to do this with minimal slicing of the alloca so that regions 00017 /// which are merely transferred in and out of external memory remain unchanged 00018 /// and are not decomposed to scalar code. 00019 /// 00020 /// Because this also performs alloca promotion, it can be thought of as also 00021 /// serving the purpose of SSA formation. The algorithm iterates on the 00022 /// function until all opportunities for promotion have been realized. 00023 /// 00024 //===----------------------------------------------------------------------===// 00025 00026 #define DEBUG_TYPE "sroa" 00027 #include "llvm/Transforms/Scalar.h" 00028 #include "llvm/ADT/STLExtras.h" 00029 #include "llvm/ADT/SetVector.h" 00030 #include "llvm/ADT/SmallVector.h" 00031 #include "llvm/ADT/Statistic.h" 00032 #include "llvm/Analysis/Dominators.h" 00033 #include "llvm/Analysis/Loads.h" 00034 #include "llvm/Analysis/PtrUseVisitor.h" 00035 #include "llvm/Analysis/ValueTracking.h" 00036 #include "llvm/DIBuilder.h" 00037 #include "llvm/DebugInfo.h" 00038 #include "llvm/IR/Constants.h" 00039 #include "llvm/IR/DataLayout.h" 00040 #include "llvm/IR/DerivedTypes.h" 00041 #include "llvm/IR/Function.h" 00042 #include "llvm/IR/IRBuilder.h" 00043 #include "llvm/IR/Instructions.h" 00044 #include "llvm/IR/IntrinsicInst.h" 00045 #include "llvm/IR/LLVMContext.h" 00046 #include "llvm/IR/Operator.h" 00047 #include "llvm/InstVisitor.h" 00048 #include "llvm/Pass.h" 00049 #include "llvm/Support/CommandLine.h" 00050 #include "llvm/Support/Debug.h" 00051 #include "llvm/Support/ErrorHandling.h" 00052 #include "llvm/Support/MathExtras.h" 00053 #include "llvm/Support/raw_ostream.h" 00054 #include "llvm/Transforms/Utils/Local.h" 00055 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 00056 #include "llvm/Transforms/Utils/SSAUpdater.h" 00057 using namespace llvm; 00058 00059 STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); 00060 STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); 00061 STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions"); 00062 STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found"); 00063 STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses"); 00064 STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); 00065 STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); 00066 STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); 00067 STATISTIC(NumDeleted, "Number of instructions deleted"); 00068 STATISTIC(NumVectorized, "Number of vectorized aggregates"); 00069 00070 /// Hidden option to force the pass to not use DomTree and mem2reg, instead 00071 /// forming SSA values through the SSAUpdater infrastructure. 00072 static cl::opt<bool> 00073 ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); 00074 00075 namespace { 00076 /// \brief A custom IRBuilder inserter which prefixes all names if they are 00077 /// preserved. 00078 template <bool preserveNames = true> 00079 class IRBuilderPrefixedInserter : 00080 public IRBuilderDefaultInserter<preserveNames> { 00081 std::string Prefix; 00082 00083 public: 00084 void SetNamePrefix(const Twine &P) { Prefix = P.str(); } 00085 00086 protected: 00087 void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, 00088 BasicBlock::iterator InsertPt) const { 00089 IRBuilderDefaultInserter<preserveNames>::InsertHelper( 00090 I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt); 00091 } 00092 }; 00093 00094 // Specialization for not preserving the name is trivial. 00095 template <> 00096 class IRBuilderPrefixedInserter<false> : 00097 public IRBuilderDefaultInserter<false> { 00098 public: 00099 void SetNamePrefix(const Twine &P) {} 00100 }; 00101 00102 /// \brief Provide a typedef for IRBuilder that drops names in release builds. 00103 #ifndef NDEBUG 00104 typedef llvm::IRBuilder<true, ConstantFolder, 00105 IRBuilderPrefixedInserter<true> > IRBuilderTy; 00106 #else 00107 typedef llvm::IRBuilder<false, ConstantFolder, 00108 IRBuilderPrefixedInserter<false> > IRBuilderTy; 00109 #endif 00110 } 00111 00112 namespace { 00113 /// \brief A common base class for representing a half-open byte range. 00114 struct ByteRange { 00115 /// \brief The beginning offset of the range. 00116 uint64_t BeginOffset; 00117 00118 /// \brief The ending offset, not included in the range. 00119 uint64_t EndOffset; 00120 00121 ByteRange() : BeginOffset(), EndOffset() {} 00122 ByteRange(uint64_t BeginOffset, uint64_t EndOffset) 00123 : BeginOffset(BeginOffset), EndOffset(EndOffset) {} 00124 00125 /// \brief Support for ordering ranges. 00126 /// 00127 /// This provides an ordering over ranges such that start offsets are 00128 /// always increasing, and within equal start offsets, the end offsets are 00129 /// decreasing. Thus the spanning range comes first in a cluster with the 00130 /// same start position. 00131 bool operator<(const ByteRange &RHS) const { 00132 if (BeginOffset < RHS.BeginOffset) return true; 00133 if (BeginOffset > RHS.BeginOffset) return false; 00134 if (EndOffset > RHS.EndOffset) return true; 00135 return false; 00136 } 00137 00138 /// \brief Support comparison with a single offset to allow binary searches. 00139 friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { 00140 return LHS.BeginOffset < RHSOffset; 00141 } 00142 00143 friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, 00144 const ByteRange &RHS) { 00145 return LHSOffset < RHS.BeginOffset; 00146 } 00147 00148 bool operator==(const ByteRange &RHS) const { 00149 return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; 00150 } 00151 bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } 00152 }; 00153 00154 /// \brief A partition of an alloca. 00155 /// 00156 /// This structure represents a contiguous partition of the alloca. These are 00157 /// formed by examining the uses of the alloca. During formation, they may 00158 /// overlap but once an AllocaPartitioning is built, the Partitions within it 00159 /// are all disjoint. 00160 struct Partition : public ByteRange { 00161 /// \brief Whether this partition is splittable into smaller partitions. 00162 /// 00163 /// We flag partitions as splittable when they are formed entirely due to 00164 /// accesses by trivially splittable operations such as memset and memcpy. 00165 bool IsSplittable; 00166 00167 /// \brief Test whether a partition has been marked as dead. 00168 bool isDead() const { 00169 if (BeginOffset == UINT64_MAX) { 00170 assert(EndOffset == UINT64_MAX); 00171 return true; 00172 } 00173 return false; 00174 } 00175 00176 /// \brief Kill a partition. 00177 /// This is accomplished by setting both its beginning and end offset to 00178 /// the maximum possible value. 00179 void kill() { 00180 assert(!isDead() && "He's Dead, Jim!"); 00181 BeginOffset = EndOffset = UINT64_MAX; 00182 } 00183 00184 Partition() : ByteRange(), IsSplittable() {} 00185 Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) 00186 : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} 00187 }; 00188 00189 /// \brief A particular use of a partition of the alloca. 00190 /// 00191 /// This structure is used to associate uses of a partition with it. They 00192 /// mark the range of bytes which are referenced by a particular instruction, 00193 /// and includes a handle to the user itself and the pointer value in use. 00194 /// The bounds of these uses are determined by intersecting the bounds of the 00195 /// memory use itself with a particular partition. As a consequence there is 00196 /// intentionally overlap between various uses of the same partition. 00197 class PartitionUse : public ByteRange { 00198 /// \brief Combined storage for both the Use* and split state. 00199 PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit; 00200 00201 public: 00202 PartitionUse() : ByteRange(), UsePtrAndIsSplit() {} 00203 PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U, 00204 bool IsSplit) 00205 : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {} 00206 00207 /// \brief The use in question. Provides access to both user and used value. 00208 /// 00209 /// Note that this may be null if the partition use is *dead*, that is, it 00210 /// should be ignored. 00211 Use *getUse() const { return UsePtrAndIsSplit.getPointer(); } 00212 00213 /// \brief Set the use for this partition use range. 00214 void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); } 00215 00216 /// \brief Whether this use is split across multiple partitions. 00217 bool isSplit() const { return UsePtrAndIsSplit.getInt(); } 00218 }; 00219 } 00220 00221 namespace llvm { 00222 template <> struct isPodLike<Partition> : llvm::true_type {}; 00223 template <> struct isPodLike<PartitionUse> : llvm::true_type {}; 00224 } 00225 00226 namespace { 00227 /// \brief Alloca partitioning representation. 00228 /// 00229 /// This class represents a partitioning of an alloca into slices, and 00230 /// information about the nature of uses of each slice of the alloca. The goal 00231 /// is that this information is sufficient to decide if and how to split the 00232 /// alloca apart and replace slices with scalars. It is also intended that this 00233 /// structure can capture the relevant information needed both to decide about 00234 /// and to enact these transformations. 00235 class AllocaPartitioning { 00236 public: 00237 /// \brief Construct a partitioning of a particular alloca. 00238 /// 00239 /// Construction does most of the work for partitioning the alloca. This 00240 /// performs the necessary walks of users and builds a partitioning from it. 00241 AllocaPartitioning(const DataLayout &TD, AllocaInst &AI); 00242 00243 /// \brief Test whether a pointer to the allocation escapes our analysis. 00244 /// 00245 /// If this is true, the partitioning is never fully built and should be 00246 /// ignored. 00247 bool isEscaped() const { return PointerEscapingInstr; } 00248 00249 /// \brief Support for iterating over the partitions. 00250 /// @{ 00251 typedef SmallVectorImpl<Partition>::iterator iterator; 00252 iterator begin() { return Partitions.begin(); } 00253 iterator end() { return Partitions.end(); } 00254 00255 typedef SmallVectorImpl<Partition>::const_iterator const_iterator; 00256 const_iterator begin() const { return Partitions.begin(); } 00257 const_iterator end() const { return Partitions.end(); } 00258 /// @} 00259 00260 /// \brief Support for iterating over and manipulating a particular 00261 /// partition's uses. 00262 /// 00263 /// The iteration support provided for uses is more limited, but also 00264 /// includes some manipulation routines to support rewriting the uses of 00265 /// partitions during SROA. 00266 /// @{ 00267 typedef SmallVectorImpl<PartitionUse>::iterator use_iterator; 00268 use_iterator use_begin(unsigned Idx) { return Uses[Idx].begin(); } 00269 use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); } 00270 use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); } 00271 use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); } 00272 00273 typedef SmallVectorImpl<PartitionUse>::const_iterator const_use_iterator; 00274 const_use_iterator use_begin(unsigned Idx) const { return Uses[Idx].begin(); } 00275 const_use_iterator use_begin(const_iterator I) const { 00276 return Uses[I - begin()].begin(); 00277 } 00278 const_use_iterator use_end(unsigned Idx) const { return Uses[Idx].end(); } 00279 const_use_iterator use_end(const_iterator I) const { 00280 return Uses[I - begin()].end(); 00281 } 00282 00283 unsigned use_size(unsigned Idx) const { return Uses[Idx].size(); } 00284 unsigned use_size(const_iterator I) const { return Uses[I - begin()].size(); } 00285 const PartitionUse &getUse(unsigned PIdx, unsigned UIdx) const { 00286 return Uses[PIdx][UIdx]; 00287 } 00288 const PartitionUse &getUse(const_iterator I, unsigned UIdx) const { 00289 return Uses[I - begin()][UIdx]; 00290 } 00291 00292 void use_push_back(unsigned Idx, const PartitionUse &PU) { 00293 Uses[Idx].push_back(PU); 00294 } 00295 void use_push_back(const_iterator I, const PartitionUse &PU) { 00296 Uses[I - begin()].push_back(PU); 00297 } 00298 /// @} 00299 00300 /// \brief Allow iterating the dead users for this alloca. 00301 /// 00302 /// These are instructions which will never actually use the alloca as they 00303 /// are outside the allocated range. They are safe to replace with undef and 00304 /// delete. 00305 /// @{ 00306 typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator; 00307 dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); } 00308 dead_user_iterator dead_user_end() const { return DeadUsers.end(); } 00309 /// @} 00310 00311 /// \brief Allow iterating the dead expressions referring to this alloca. 00312 /// 00313 /// These are operands which have cannot actually be used to refer to the 00314 /// alloca as they are outside its range and the user doesn't correct for 00315 /// that. These mostly consist of PHI node inputs and the like which we just 00316 /// need to replace with undef. 00317 /// @{ 00318 typedef SmallVectorImpl<Use *>::const_iterator dead_op_iterator; 00319 dead_op_iterator dead_op_begin() const { return DeadOperands.begin(); } 00320 dead_op_iterator dead_op_end() const { return DeadOperands.end(); } 00321 /// @} 00322 00323 /// \brief MemTransferInst auxiliary data. 00324 /// This struct provides some auxiliary data about memory transfer 00325 /// intrinsics such as memcpy and memmove. These intrinsics can use two 00326 /// different ranges within the same alloca, and provide other challenges to 00327 /// correctly represent. We stash extra data to help us untangle this 00328 /// after the partitioning is complete. 00329 struct MemTransferOffsets { 00330 /// The destination begin and end offsets when the destination is within 00331 /// this alloca. If the end offset is zero the destination is not within 00332 /// this alloca. 00333 uint64_t DestBegin, DestEnd; 00334 00335 /// The source begin and end offsets when the source is within this alloca. 00336 /// If the end offset is zero, the source is not within this alloca. 00337 uint64_t SourceBegin, SourceEnd; 00338 00339 /// Flag for whether an alloca is splittable. 00340 bool IsSplittable; 00341 }; 00342 MemTransferOffsets getMemTransferOffsets(MemTransferInst &II) const { 00343 return MemTransferInstData.lookup(&II); 00344 } 00345 00346 /// \brief Map from a PHI or select operand back to a partition. 00347 /// 00348 /// When manipulating PHI nodes or selects, they can use more than one 00349 /// partition of an alloca. We store a special mapping to allow finding the 00350 /// partition referenced by each of these operands, if any. 00351 iterator findPartitionForPHIOrSelectOperand(Use *U) { 00352 SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt 00353 = PHIOrSelectOpMap.find(U); 00354 if (MapIt == PHIOrSelectOpMap.end()) 00355 return end(); 00356 00357 return begin() + MapIt->second.first; 00358 } 00359 00360 /// \brief Map from a PHI or select operand back to the specific use of 00361 /// a partition. 00362 /// 00363 /// Similar to mapping these operands back to the partitions, this maps 00364 /// directly to the use structure of that partition. 00365 use_iterator findPartitionUseForPHIOrSelectOperand(Use *U) { 00366 SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt 00367 = PHIOrSelectOpMap.find(U); 00368 assert(MapIt != PHIOrSelectOpMap.end()); 00369 return Uses[MapIt->second.first].begin() + MapIt->second.second; 00370 } 00371 00372 /// \brief Compute a common type among the uses of a particular partition. 00373 /// 00374 /// This routines walks all of the uses of a particular partition and tries 00375 /// to find a common type between them. Untyped operations such as memset and 00376 /// memcpy are ignored. 00377 Type *getCommonType(iterator I) const; 00378 00379 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 00380 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; 00381 void printUsers(raw_ostream &OS, const_iterator I, 00382 StringRef Indent = " ") const; 00383 void print(raw_ostream &OS) const; 00384 void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const; 00385 void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const; 00386 #endif 00387 00388 private: 00389 template <typename DerivedT, typename RetT = void> class BuilderBase; 00390 class PartitionBuilder; 00391 friend class AllocaPartitioning::PartitionBuilder; 00392 class UseBuilder; 00393 friend class AllocaPartitioning::UseBuilder; 00394 00395 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 00396 /// \brief Handle to alloca instruction to simplify method interfaces. 00397 AllocaInst &AI; 00398 #endif 00399 00400 /// \brief The instruction responsible for this alloca having no partitioning. 00401 /// 00402 /// When an instruction (potentially) escapes the pointer to the alloca, we 00403 /// store a pointer to that here and abort trying to partition the alloca. 00404 /// This will be null if the alloca is partitioned successfully. 00405 Instruction *PointerEscapingInstr; 00406 00407 /// \brief The partitions of the alloca. 00408 /// 00409 /// We store a vector of the partitions over the alloca here. This vector is 00410 /// sorted by increasing begin offset, and then by decreasing end offset. See 00411 /// the Partition inner class for more details. Initially (during 00412 /// construction) there are overlaps, but we form a disjoint sequence of 00413 /// partitions while finishing construction and a fully constructed object is 00414 /// expected to always have this as a disjoint space. 00415 SmallVector<Partition, 8> Partitions; 00416 00417 /// \brief The uses of the partitions. 00418 /// 00419 /// This is essentially a mapping from each partition to a list of uses of 00420 /// that partition. The mapping is done with a Uses vector that has the exact 00421 /// same number of entries as the partition vector. Each entry is itself 00422 /// a vector of the uses. 00423 SmallVector<SmallVector<PartitionUse, 2>, 8> Uses; 00424 00425 /// \brief Instructions which will become dead if we rewrite the alloca. 00426 /// 00427 /// Note that these are not separated by partition. This is because we expect 00428 /// a partitioned alloca to be completely rewritten or not rewritten at all. 00429 /// If rewritten, all these instructions can simply be removed and replaced 00430 /// with undef as they come from outside of the allocated space. 00431 SmallVector<Instruction *, 8> DeadUsers; 00432 00433 /// \brief Operands which will become dead if we rewrite the alloca. 00434 /// 00435 /// These are operands that in their particular use can be replaced with 00436 /// undef when we rewrite the alloca. These show up in out-of-bounds inputs 00437 /// to PHI nodes and the like. They aren't entirely dead (there might be 00438 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we 00439 /// want to swap this particular input for undef to simplify the use lists of 00440 /// the alloca. 00441 SmallVector<Use *, 8> DeadOperands; 00442 00443 /// \brief The underlying storage for auxiliary memcpy and memset info. 00444 SmallDenseMap<MemTransferInst *, MemTransferOffsets, 4> MemTransferInstData; 00445 00446 /// \brief A side datastructure used when building up the partitions and uses. 00447 /// 00448 /// This mapping is only really used during the initial building of the 00449 /// partitioning so that we can retain information about PHI and select nodes 00450 /// processed. 00451 SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes; 00452 00453 /// \brief Auxiliary information for particular PHI or select operands. 00454 SmallDenseMap<Use *, std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap; 00455 00456 /// \brief A utility routine called from the constructor. 00457 /// 00458 /// This does what it says on the tin. It is the key of the alloca partition 00459 /// splitting and merging. After it is called we have the desired disjoint 00460 /// collection of partitions. 00461 void splitAndMergePartitions(); 00462 }; 00463 } 00464 00465 static Value *foldSelectInst(SelectInst &SI) { 00466 // If the condition being selected on is a constant or the same value is 00467 // being selected between, fold the select. Yes this does (rarely) happen 00468 // early on. 00469 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) 00470 return SI.getOperand(1+CI->isZero()); 00471 if (SI.getOperand(1) == SI.getOperand(2)) 00472 return SI.getOperand(1); 00473 00474 return 0; 00475 } 00476 00477 /// \brief Builder for the alloca partitioning. 00478 /// 00479 /// This class builds an alloca partitioning by recursively visiting the uses 00480 /// of an alloca and splitting the partitions for each load and store at each 00481 /// offset. 00482 class AllocaPartitioning::PartitionBuilder 00483 : public PtrUseVisitor<PartitionBuilder> { 00484 friend class PtrUseVisitor<PartitionBuilder>; 00485 friend class InstVisitor<PartitionBuilder>; 00486 typedef PtrUseVisitor<PartitionBuilder> Base; 00487 00488 const uint64_t AllocSize; 00489 AllocaPartitioning &P; 00490 00491 SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap; 00492 00493 public: 00494 PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P) 00495 : PtrUseVisitor<PartitionBuilder>(DL), 00496 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), 00497 P(P) {} 00498 00499 private: 00500 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size, 00501 bool IsSplittable = false) { 00502 // Completely skip uses which have a zero size or start either before or 00503 // past the end of the allocation. 00504 if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) { 00505 DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset 00506 << " which has zero size or starts outside of the " 00507 << AllocSize << " byte alloca:\n" 00508 << " alloca: " << P.AI << "\n" 00509 << " use: " << I << "\n"); 00510 return; 00511 } 00512 00513 uint64_t BeginOffset = Offset.getZExtValue(); 00514 uint64_t EndOffset = BeginOffset + Size; 00515 00516 // Clamp the end offset to the end of the allocation. Note that this is 00517 // formulated to handle even the case where "BeginOffset + Size" overflows. 00518 // This may appear superficially to be something we could ignore entirely, 00519 // but that is not so! There may be widened loads or PHI-node uses where 00520 // some instructions are dead but not others. We can't completely ignore 00521 // them, and so have to record at least the information here. 00522 assert(AllocSize >= BeginOffset); // Established above. 00523 if (Size > AllocSize - BeginOffset) { 00524 DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset 00525 << " to remain within the " << AllocSize << " byte alloca:\n" 00526 << " alloca: " << P.AI << "\n" 00527 << " use: " << I << "\n"); 00528 EndOffset = AllocSize; 00529 } 00530 00531 Partition New(BeginOffset, EndOffset, IsSplittable); 00532 P.Partitions.push_back(New); 00533 } 00534 00535 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, 00536 uint64_t Size, bool IsVolatile) { 00537 // We allow splitting of loads and stores where the type is an integer type 00538 // and cover the entire alloca. This prevents us from splitting over 00539 // eagerly. 00540 // FIXME: In the great blue eventually, we should eagerly split all integer 00541 // loads and stores, and then have a separate step that merges adjacent 00542 // alloca partitions into a single partition suitable for integer widening. 00543 // Or we should skip the merge step and rely on GVN and other passes to 00544 // merge adjacent loads and stores that survive mem2reg. 00545 bool IsSplittable = 00546 Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; 00547 00548 insertUse(I, Offset, Size, IsSplittable); 00549 } 00550 00551 void visitLoadInst(LoadInst &LI) { 00552 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && 00553 "All simple FCA loads should have been pre-split"); 00554 00555 if (!IsOffsetKnown) 00556 return PI.setAborted(&LI); 00557 00558 uint64_t Size = DL.getTypeStoreSize(LI.getType()); 00559 return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); 00560 } 00561 00562 void visitStoreInst(StoreInst &SI) { 00563 Value *ValOp = SI.getValueOperand(); 00564 if (ValOp == *U) 00565 return PI.setEscapedAndAborted(&SI); 00566 if (!IsOffsetKnown) 00567 return PI.setAborted(&SI); 00568 00569 uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); 00570 00571 // If this memory access can be shown to *statically* extend outside the 00572 // bounds of of the allocation, it's behavior is undefined, so simply 00573 // ignore it. Note that this is more strict than the generic clamping 00574 // behavior of insertUse. We also try to handle cases which might run the 00575 // risk of overflow. 00576 // FIXME: We should instead consider the pointer to have escaped if this 00577 // function is being instrumented for addressing bugs or race conditions. 00578 if (Offset.isNegative() || Size > AllocSize || 00579 Offset.ugt(AllocSize - Size)) { 00580 DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset 00581 << " which extends past the end of the " << AllocSize 00582 << " byte alloca:\n" 00583 << " alloca: " << P.AI << "\n" 00584 << " use: " << SI << "\n"); 00585 return; 00586 } 00587 00588 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && 00589 "All simple FCA stores should have been pre-split"); 00590 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); 00591 } 00592 00593 00594 void visitMemSetInst(MemSetInst &II) { 00595 assert(II.getRawDest() == *U && "Pointer use is not the destination?"); 00596 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 00597 if ((Length && Length->getValue() == 0) || 00598 (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) 00599 // Zero-length mem transfer intrinsics can be ignored entirely. 00600 return; 00601 00602 if (!IsOffsetKnown) 00603 return PI.setAborted(&II); 00604 00605 insertUse(II, Offset, 00606 Length ? Length->getLimitedValue() 00607 : AllocSize - Offset.getLimitedValue(), 00608 (bool)Length); 00609 } 00610 00611 void visitMemTransferInst(MemTransferInst &II) { 00612 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 00613 if ((Length && Length->getValue() == 0) || 00614 (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) 00615 // Zero-length mem transfer intrinsics can be ignored entirely. 00616 return; 00617 00618 if (!IsOffsetKnown) 00619 return PI.setAborted(&II); 00620 00621 uint64_t RawOffset = Offset.getLimitedValue(); 00622 uint64_t Size = Length ? Length->getLimitedValue() 00623 : AllocSize - RawOffset; 00624 00625 MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; 00626 00627 // Only intrinsics with a constant length can be split. 00628 Offsets.IsSplittable = Length; 00629 00630 if (*U == II.getRawDest()) { 00631 Offsets.DestBegin = RawOffset; 00632 Offsets.DestEnd = RawOffset + Size; 00633 } 00634 if (*U == II.getRawSource()) { 00635 Offsets.SourceBegin = RawOffset; 00636 Offsets.SourceEnd = RawOffset + Size; 00637 } 00638 00639 // If we have set up end offsets for both the source and the destination, 00640 // we have found both sides of this transfer pointing at the same alloca. 00641 bool SeenBothEnds = Offsets.SourceEnd && Offsets.DestEnd; 00642 if (SeenBothEnds && II.getRawDest() != II.getRawSource()) { 00643 unsigned PrevIdx = MemTransferPartitionMap[&II]; 00644 00645 // Check if the begin offsets match and this is a non-volatile transfer. 00646 // In that case, we can completely elide the transfer. 00647 if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) { 00648 P.Partitions[PrevIdx].kill(); 00649 return; 00650 } 00651 00652 // Otherwise we have an offset transfer within the same alloca. We can't 00653 // split those. 00654 P.Partitions[PrevIdx].IsSplittable = Offsets.IsSplittable = false; 00655 } else if (SeenBothEnds) { 00656 // Handle the case where this exact use provides both ends of the 00657 // operation. 00658 assert(II.getRawDest() == II.getRawSource()); 00659 00660 // For non-volatile transfers this is a no-op. 00661 if (!II.isVolatile()) 00662 return; 00663 00664 // Otherwise just suppress splitting. 00665 Offsets.IsSplittable = false; 00666 } 00667 00668 00669 // Insert the use now that we've fixed up the splittable nature. 00670 insertUse(II, Offset, Size, Offsets.IsSplittable); 00671 00672 // Setup the mapping from intrinsic to partition of we've not seen both 00673 // ends of this transfer. 00674 if (!SeenBothEnds) { 00675 unsigned NewIdx = P.Partitions.size() - 1; 00676 bool Inserted 00677 = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)).second; 00678 assert(Inserted && 00679 "Already have intrinsic in map but haven't seen both ends"); 00680 (void)Inserted; 00681 } 00682 } 00683 00684 // Disable SRoA for any intrinsics except for lifetime invariants. 00685 // FIXME: What about debug intrinsics? This matches old behavior, but 00686 // doesn't make sense. 00687 void visitIntrinsicInst(IntrinsicInst &II) { 00688 if (!IsOffsetKnown) 00689 return PI.setAborted(&II); 00690 00691 if (II.getIntrinsicID() == Intrinsic::lifetime_start || 00692 II.getIntrinsicID() == Intrinsic::lifetime_end) { 00693 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); 00694 uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), 00695 Length->getLimitedValue()); 00696 insertUse(II, Offset, Size, true); 00697 return; 00698 } 00699 00700 Base::visitIntrinsicInst(II); 00701 } 00702 00703 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) { 00704 // We consider any PHI or select that results in a direct load or store of 00705 // the same offset to be a viable use for partitioning purposes. These uses 00706 // are considered unsplittable and the size is the maximum loaded or stored 00707 // size. 00708 SmallPtrSet<Instruction *, 4> Visited; 00709 SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; 00710 Visited.insert(Root); 00711 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); 00712 // If there are no loads or stores, the access is dead. We mark that as 00713 // a size zero access. 00714 Size = 0; 00715 do { 00716 Instruction *I, *UsedI; 00717 llvm::tie(UsedI, I) = Uses.pop_back_val(); 00718 00719 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 00720 Size = std::max(Size, DL.getTypeStoreSize(LI->getType())); 00721 continue; 00722 } 00723 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 00724 Value *Op = SI->getOperand(0); 00725 if (Op == UsedI) 00726 return SI; 00727 Size = std::max(Size, DL.getTypeStoreSize(Op->getType())); 00728 continue; 00729 } 00730 00731 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 00732 if (!GEP->hasAllZeroIndices()) 00733 return GEP; 00734 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) && 00735 !isa<SelectInst>(I)) { 00736 return I; 00737 } 00738 00739 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; 00740 ++UI) 00741 if (Visited.insert(cast<Instruction>(*UI))) 00742 Uses.push_back(std::make_pair(I, cast<Instruction>(*UI))); 00743 } while (!Uses.empty()); 00744 00745 return 0; 00746 } 00747 00748 void visitPHINode(PHINode &PN) { 00749 if (PN.use_empty()) 00750 return; 00751 if (!IsOffsetKnown) 00752 return PI.setAborted(&PN); 00753 00754 // See if we already have computed info on this node. 00755 std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN]; 00756 if (PHIInfo.first) { 00757 PHIInfo.second = true; 00758 insertUse(PN, Offset, PHIInfo.first); 00759 return; 00760 } 00761 00762 // Check for an unsafe use of the PHI node. 00763 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first)) 00764 return PI.setAborted(UnsafeI); 00765 00766 insertUse(PN, Offset, PHIInfo.first); 00767 } 00768 00769 void visitSelectInst(SelectInst &SI) { 00770 if (SI.use_empty()) 00771 return; 00772 if (Value *Result = foldSelectInst(SI)) { 00773 if (Result == *U) 00774 // If the result of the constant fold will be the pointer, recurse 00775 // through the select as if we had RAUW'ed it. 00776 enqueueUsers(SI); 00777 00778 return; 00779 } 00780 if (!IsOffsetKnown) 00781 return PI.setAborted(&SI); 00782 00783 // See if we already have computed info on this node. 00784 std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI]; 00785 if (SelectInfo.first) { 00786 SelectInfo.second = true; 00787 insertUse(SI, Offset, SelectInfo.first); 00788 return; 00789 } 00790 00791 // Check for an unsafe use of the PHI node. 00792 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first)) 00793 return PI.setAborted(UnsafeI); 00794 00795 insertUse(SI, Offset, SelectInfo.first); 00796 } 00797 00798 /// \brief Disable SROA entirely if there are unhandled users of the alloca. 00799 void visitInstruction(Instruction &I) { 00800 PI.setAborted(&I); 00801 } 00802 }; 00803 00804 /// \brief Use adder for the alloca partitioning. 00805 /// 00806 /// This class adds the uses of an alloca to all of the partitions which they 00807 /// use. For splittable partitions, this can end up doing essentially a linear 00808 /// walk of the partitions, but the number of steps remains bounded by the 00809 /// total result instruction size: 00810 /// - The number of partitions is a result of the number unsplittable 00811 /// instructions using the alloca. 00812 /// - The number of users of each partition is at worst the total number of 00813 /// splittable instructions using the alloca. 00814 /// Thus we will produce N * M instructions in the end, where N are the number 00815 /// of unsplittable uses and M are the number of splittable. This visitor does 00816 /// the exact same number of updates to the partitioning. 00817 /// 00818 /// In the more common case, this visitor will leverage the fact that the 00819 /// partition space is pre-sorted, and do a logarithmic search for the 00820 /// partition needed, making the total visit a classical ((N + M) * log(N)) 00821 /// complexity operation. 00822 class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> { 00823 friend class PtrUseVisitor<UseBuilder>; 00824 friend class InstVisitor<UseBuilder>; 00825 typedef PtrUseVisitor<UseBuilder> Base; 00826 00827 const uint64_t AllocSize; 00828 AllocaPartitioning &P; 00829 00830 /// \brief Set to de-duplicate dead instructions found in the use walk. 00831 SmallPtrSet<Instruction *, 4> VisitedDeadInsts; 00832 00833 public: 00834 UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P) 00835 : PtrUseVisitor<UseBuilder>(TD), 00836 AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())), 00837 P(P) {} 00838 00839 private: 00840 void markAsDead(Instruction &I) { 00841 if (VisitedDeadInsts.insert(&I)) 00842 P.DeadUsers.push_back(&I); 00843 } 00844 00845 void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) { 00846 // If the use has a zero size or extends outside of the allocation, record 00847 // it as a dead use for elimination later. 00848 if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) 00849 return markAsDead(User); 00850 00851 uint64_t BeginOffset = Offset.getZExtValue(); 00852 uint64_t EndOffset = BeginOffset + Size; 00853 00854 // Clamp the end offset to the end of the allocation. Note that this is 00855 // formulated to handle even the case where "BeginOffset + Size" overflows. 00856 assert(AllocSize >= BeginOffset); // Established above. 00857 if (Size > AllocSize - BeginOffset) 00858 EndOffset = AllocSize; 00859 00860 // NB: This only works if we have zero overlapping partitions. 00861 iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset); 00862 if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset) 00863 I = llvm::prior(I); 00864 iterator E = P.end(); 00865 bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset; 00866 for (; I != E && I->BeginOffset < EndOffset; ++I) { 00867 PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), 00868 std::min(I->EndOffset, EndOffset), U, IsSplit); 00869 P.use_push_back(I, NewPU); 00870 if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) 00871 P.PHIOrSelectOpMap[U] 00872 = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1); 00873 } 00874 } 00875 00876 void visitBitCastInst(BitCastInst &BC) { 00877 if (BC.use_empty()) 00878 return markAsDead(BC); 00879 00880 return Base::visitBitCastInst(BC); 00881 } 00882 00883 void visitGetElementPtrInst(GetElementPtrInst &GEPI) { 00884 if (GEPI.use_empty()) 00885 return markAsDead(GEPI); 00886 00887 return Base::visitGetElementPtrInst(GEPI); 00888 } 00889 00890 void visitLoadInst(LoadInst &LI) { 00891 assert(IsOffsetKnown); 00892 uint64_t Size = DL.getTypeStoreSize(LI.getType()); 00893 insertUse(LI, Offset, Size); 00894 } 00895 00896 void visitStoreInst(StoreInst &SI) { 00897 assert(IsOffsetKnown); 00898 uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); 00899 00900 // If this memory access can be shown to *statically* extend outside the 00901 // bounds of of the allocation, it's behavior is undefined, so simply 00902 // ignore it. Note that this is more strict than the generic clamping 00903 // behavior of insertUse. 00904 if (Offset.isNegative() || Size > AllocSize || 00905 Offset.ugt(AllocSize - Size)) 00906 return markAsDead(SI); 00907 00908 insertUse(SI, Offset, Size); 00909 } 00910 00911 void visitMemSetInst(MemSetInst &II) { 00912 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 00913 if ((Length && Length->getValue() == 0) || 00914 (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) 00915 return markAsDead(II); 00916 00917 assert(IsOffsetKnown); 00918 insertUse(II, Offset, Length ? Length->getLimitedValue() 00919 : AllocSize - Offset.getLimitedValue()); 00920 } 00921 00922 void visitMemTransferInst(MemTransferInst &II) { 00923 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 00924 if ((Length && Length->getValue() == 0) || 00925 (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) 00926 return markAsDead(II); 00927 00928 assert(IsOffsetKnown); 00929 uint64_t Size = Length ? Length->getLimitedValue() 00930 : AllocSize - Offset.getLimitedValue(); 00931 00932 const MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; 00933 if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd && 00934 Offsets.DestBegin == Offsets.SourceBegin) 00935 return markAsDead(II); // Skip identity transfers without side-effects. 00936 00937 insertUse(II, Offset, Size); 00938 } 00939 00940 void visitIntrinsicInst(IntrinsicInst &II) { 00941 assert(IsOffsetKnown); 00942 assert(II.getIntrinsicID() == Intrinsic::lifetime_start || 00943 II.getIntrinsicID() == Intrinsic::lifetime_end); 00944 00945 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); 00946 insertUse(II, Offset, std::min(Length->getLimitedValue(), 00947 AllocSize - Offset.getLimitedValue())); 00948 } 00949 00950 void insertPHIOrSelect(Instruction &User, const APInt &Offset) { 00951 uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first; 00952 00953 // For PHI and select operands outside the alloca, we can't nuke the entire 00954 // phi or select -- the other side might still be relevant, so we special 00955 // case them here and use a separate structure to track the operands 00956 // themselves which should be replaced with undef. 00957 if ((Offset.isNegative() && Offset.uge(Size)) || 00958 (!Offset.isNegative() && Offset.uge(AllocSize))) { 00959 P.DeadOperands.push_back(U); 00960 return; 00961 } 00962 00963 insertUse(User, Offset, Size); 00964 } 00965 00966 void visitPHINode(PHINode &PN) { 00967 if (PN.use_empty()) 00968 return markAsDead(PN); 00969 00970 assert(IsOffsetKnown); 00971 insertPHIOrSelect(PN, Offset); 00972 } 00973 00974 void visitSelectInst(SelectInst &SI) { 00975 if (SI.use_empty()) 00976 return markAsDead(SI); 00977 00978 if (Value *Result = foldSelectInst(SI)) { 00979 if (Result == *U) 00980 // If the result of the constant fold will be the pointer, recurse 00981 // through the select as if we had RAUW'ed it. 00982 enqueueUsers(SI); 00983 else 00984 // Otherwise the operand to the select is dead, and we can replace it 00985 // with undef. 00986 P.DeadOperands.push_back(U); 00987 00988 return; 00989 } 00990 00991 assert(IsOffsetKnown); 00992 insertPHIOrSelect(SI, Offset); 00993 } 00994 00995 /// \brief Unreachable, we've already visited the alloca once. 00996 void visitInstruction(Instruction &I) { 00997 llvm_unreachable("Unhandled instruction in use builder."); 00998 } 00999 }; 01000 01001 void AllocaPartitioning::splitAndMergePartitions() { 01002 size_t NumDeadPartitions = 0; 01003 01004 // Track the range of splittable partitions that we pass when accumulating 01005 // overlapping unsplittable partitions. 01006 uint64_t SplitEndOffset = 0ull; 01007 01008 Partition New(0ull, 0ull, false); 01009 01010 for (unsigned i = 0, j = i, e = Partitions.size(); i != e; i = j) { 01011 ++j; 01012 01013 if (!Partitions[i].IsSplittable || New.BeginOffset == New.EndOffset) { 01014 assert(New.BeginOffset == New.EndOffset); 01015 New = Partitions[i]; 01016 } else { 01017 assert(New.IsSplittable); 01018 New.EndOffset = std::max(New.EndOffset, Partitions[i].EndOffset); 01019 } 01020 assert(New.BeginOffset != New.EndOffset); 01021 01022 // Scan the overlapping partitions. 01023 while (j != e && New.EndOffset > Partitions[j].BeginOffset) { 01024 // If the new partition we are forming is splittable, stop at the first 01025 // unsplittable partition. 01026 if (New.IsSplittable && !Partitions[j].IsSplittable) 01027 break; 01028 01029 // Grow the new partition to include any equally splittable range. 'j' is 01030 // always equally splittable when New is splittable, but when New is not 01031 // splittable, we may subsume some (or part of some) splitable partition 01032 // without growing the new one. 01033 if (New.IsSplittable == Partitions[j].IsSplittable) { 01034 New.EndOffset = std::max(New.EndOffset, Partitions[j].EndOffset); 01035 } else { 01036 assert(!New.IsSplittable); 01037 assert(Partitions[j].IsSplittable); 01038 SplitEndOffset = std::max(SplitEndOffset, Partitions[j].EndOffset); 01039 } 01040 01041 Partitions[j].kill(); 01042 ++NumDeadPartitions; 01043 ++j; 01044 } 01045 01046 // If the new partition is splittable, chop off the end as soon as the 01047 // unsplittable subsequent partition starts and ensure we eventually cover 01048 // the splittable area. 01049 if (j != e && New.IsSplittable) { 01050 SplitEndOffset = std::max(SplitEndOffset, New.EndOffset); 01051 New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset); 01052 } 01053 01054 // Add the new partition if it differs from the original one and is 01055 // non-empty. We can end up with an empty partition here if it was 01056 // splittable but there is an unsplittable one that starts at the same 01057 // offset. 01058 if (New != Partitions[i]) { 01059 if (New.BeginOffset != New.EndOffset) 01060 Partitions.push_back(New); 01061 // Mark the old one for removal. 01062 Partitions[i].kill(); 01063 ++NumDeadPartitions; 01064 } 01065 01066 New.BeginOffset = New.EndOffset; 01067 if (!New.IsSplittable) { 01068 New.EndOffset = std::max(New.EndOffset, SplitEndOffset); 01069 if (j != e && !Partitions[j].IsSplittable) 01070 New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset); 01071 New.IsSplittable = true; 01072 // If there is a trailing splittable partition which won't be fused into 01073 // the next splittable partition go ahead and add it onto the partitions 01074 // list. 01075 if (New.BeginOffset < New.EndOffset && 01076 (j == e || !Partitions[j].IsSplittable || 01077 New.EndOffset < Partitions[j].BeginOffset)) { 01078 Partitions.push_back(New); 01079 New.BeginOffset = New.EndOffset = 0ull; 01080 } 01081 } 01082 } 01083 01084 // Re-sort the partitions now that they have been split and merged into 01085 // disjoint set of partitions. Also remove any of the dead partitions we've 01086 // replaced in the process. 01087 std::sort(Partitions.begin(), Partitions.end()); 01088 if (NumDeadPartitions) { 01089 assert(Partitions.back().isDead()); 01090 assert((ptrdiff_t)NumDeadPartitions == 01091 std::count(Partitions.begin(), Partitions.end(), Partitions.back())); 01092 } 01093 Partitions.erase(Partitions.end() - NumDeadPartitions, Partitions.end()); 01094 } 01095 01096 AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) 01097 : 01098 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01099 AI(AI), 01100 #endif 01101 PointerEscapingInstr(0) { 01102 PartitionBuilder PB(TD, AI, *this); 01103 PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI); 01104 if (PtrI.isEscaped() || PtrI.isAborted()) { 01105 // FIXME: We should sink the escape vs. abort info into the caller nicely, 01106 // possibly by just storing the PtrInfo in the AllocaPartitioning. 01107 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst() 01108 : PtrI.getAbortingInst(); 01109 assert(PointerEscapingInstr && "Did not track a bad instruction"); 01110 return; 01111 } 01112 01113 // Sort the uses. This arranges for the offsets to be in ascending order, 01114 // and the sizes to be in descending order. 01115 std::sort(Partitions.begin(), Partitions.end()); 01116 01117 // Remove any partitions from the back which are marked as dead. 01118 while (!Partitions.empty() && Partitions.back().isDead()) 01119 Partitions.pop_back(); 01120 01121 if (Partitions.size() > 1) { 01122 // Intersect splittability for all partitions with equal offsets and sizes. 01123 // Then remove all but the first so that we have a sequence of non-equal but 01124 // potentially overlapping partitions. 01125 for (iterator I = Partitions.begin(), J = I, E = Partitions.end(); I != E; 01126 I = J) { 01127 ++J; 01128 while (J != E && *I == *J) { 01129 I->IsSplittable &= J->IsSplittable; 01130 ++J; 01131 } 01132 } 01133 Partitions.erase(std::unique(Partitions.begin(), Partitions.end()), 01134 Partitions.end()); 01135 01136 // Split splittable and merge unsplittable partitions into a disjoint set 01137 // of partitions over the used space of the allocation. 01138 splitAndMergePartitions(); 01139 } 01140 01141 // Record how many partitions we end up with. 01142 NumAllocaPartitions += Partitions.size(); 01143 MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca); 01144 01145 // Now build up the user lists for each of these disjoint partitions by 01146 // re-walking the recursive users of the alloca. 01147 Uses.resize(Partitions.size()); 01148 UseBuilder UB(TD, AI, *this); 01149 PtrI = UB.visitPtr(AI); 01150 assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!"); 01151 assert(!PtrI.isAborted() && "Early aborted the visit of the pointer."); 01152 01153 unsigned NumUses = 0; 01154 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) 01155 for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx) 01156 NumUses += Uses[Idx].size(); 01157 #endif 01158 NumAllocaPartitionUses += NumUses; 01159 MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca); 01160 } 01161 01162 Type *AllocaPartitioning::getCommonType(iterator I) const { 01163 Type *Ty = 0; 01164 for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { 01165 Use *U = UI->getUse(); 01166 if (!U) 01167 continue; // Skip dead uses. 01168 if (isa<IntrinsicInst>(*U->getUser())) 01169 continue; 01170 if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) 01171 continue; 01172 01173 Type *UserTy = 0; 01174 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) 01175 UserTy = LI->getType(); 01176 else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) 01177 UserTy = SI->getValueOperand()->getType(); 01178 else 01179 return 0; // Bail if we have weird uses. 01180 01181 if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) { 01182 // If the type is larger than the partition, skip it. We only encounter 01183 // this for split integer operations where we want to use the type of the 01184 // entity causing the split. 01185 if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8) 01186 continue; 01187 01188 // If we have found an integer type use covering the alloca, use that 01189 // regardless of the other types, as integers are often used for a "bucket 01190 // of bits" type. 01191 return ITy; 01192 } 01193 01194 if (Ty && Ty != UserTy) 01195 return 0; 01196 01197 Ty = UserTy; 01198 } 01199 return Ty; 01200 } 01201 01202 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01203 01204 void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, 01205 StringRef Indent) const { 01206 OS << Indent << "partition #" << (I - begin()) 01207 << " [" << I->BeginOffset << "," << I->EndOffset << ")" 01208 << (I->IsSplittable ? " (splittable)" : "") 01209 << (Uses[I - begin()].empty() ? " (zero uses)" : "") 01210 << "\n"; 01211 } 01212 01213 void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, 01214 StringRef Indent) const { 01215 for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { 01216 if (!UI->getUse()) 01217 continue; // Skip dead uses. 01218 OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " 01219 << "used by: " << *UI->getUse()->getUser() << "\n"; 01220 if (MemTransferInst *II = 01221 dyn_cast<MemTransferInst>(UI->getUse()->getUser())) { 01222 const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); 01223 bool IsDest; 01224 if (!MTO.IsSplittable) 01225 IsDest = UI->BeginOffset == MTO.DestBegin; 01226 else 01227 IsDest = MTO.DestBegin != 0u; 01228 OS << Indent << " (original " << (IsDest ? "dest" : "source") << ": " 01229 << "[" << (IsDest ? MTO.DestBegin : MTO.SourceBegin) 01230 << "," << (IsDest ? MTO.DestEnd : MTO.SourceEnd) << ")\n"; 01231 } 01232 } 01233 } 01234 01235 void AllocaPartitioning::print(raw_ostream &OS) const { 01236 if (PointerEscapingInstr) { 01237 OS << "No partitioning for alloca: " << AI << "\n" 01238 << " A pointer to this alloca escaped by:\n" 01239 << " " << *PointerEscapingInstr << "\n"; 01240 return; 01241 } 01242 01243 OS << "Partitioning of alloca: " << AI << "\n"; 01244 for (const_iterator I = begin(), E = end(); I != E; ++I) { 01245 print(OS, I); 01246 printUsers(OS, I); 01247 } 01248 } 01249 01250 void AllocaPartitioning::dump(const_iterator I) const { print(dbgs(), I); } 01251 void AllocaPartitioning::dump() const { print(dbgs()); } 01252 01253 #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01254 01255 01256 namespace { 01257 /// \brief Implementation of LoadAndStorePromoter for promoting allocas. 01258 /// 01259 /// This subclass of LoadAndStorePromoter adds overrides to handle promoting 01260 /// the loads and stores of an alloca instruction, as well as updating its 01261 /// debug information. This is used when a domtree is unavailable and thus 01262 /// mem2reg in its full form can't be used to handle promotion of allocas to 01263 /// scalar values. 01264 class AllocaPromoter : public LoadAndStorePromoter { 01265 AllocaInst &AI; 01266 DIBuilder &DIB; 01267 01268 SmallVector<DbgDeclareInst *, 4> DDIs; 01269 SmallVector<DbgValueInst *, 4> DVIs; 01270 01271 public: 01272 AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, 01273 AllocaInst &AI, DIBuilder &DIB) 01274 : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} 01275 01276 void run(const SmallVectorImpl<Instruction*> &Insts) { 01277 // Remember which alloca we're promoting (for isInstInList). 01278 if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) { 01279 for (Value::use_iterator UI = DebugNode->use_begin(), 01280 UE = DebugNode->use_end(); 01281 UI != UE; ++UI) 01282 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 01283 DDIs.push_back(DDI); 01284 else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) 01285 DVIs.push_back(DVI); 01286 } 01287 01288 LoadAndStorePromoter::run(Insts); 01289 AI.eraseFromParent(); 01290 while (!DDIs.empty()) 01291 DDIs.pop_back_val()->eraseFromParent(); 01292 while (!DVIs.empty()) 01293 DVIs.pop_back_val()->eraseFromParent(); 01294 } 01295 01296 virtual bool isInstInList(Instruction *I, 01297 const SmallVectorImpl<Instruction*> &Insts) const { 01298 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 01299 return LI->getOperand(0) == &AI; 01300 return cast<StoreInst>(I)->getPointerOperand() == &AI; 01301 } 01302 01303 virtual void updateDebugInfo(Instruction *Inst) const { 01304 for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 01305 E = DDIs.end(); I != E; ++I) { 01306 DbgDeclareInst *DDI = *I; 01307 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 01308 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 01309 else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 01310 ConvertDebugDeclareToDebugValue(DDI, LI, DIB); 01311 } 01312 for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 01313 E = DVIs.end(); I != E; ++I) { 01314 DbgValueInst *DVI = *I; 01315 Value *Arg = 0; 01316 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 01317 // If an argument is zero extended then use argument directly. The ZExt 01318 // may be zapped by an optimization pass in future. 01319 if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) 01320 Arg = dyn_cast<Argument>(ZExt->getOperand(0)); 01321 else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) 01322 Arg = dyn_cast<Argument>(SExt->getOperand(0)); 01323 if (!Arg) 01324 Arg = SI->getValueOperand(); 01325 } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 01326 Arg = LI->getPointerOperand(); 01327 } else { 01328 continue; 01329 } 01330 Instruction *DbgVal = 01331 DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), 01332 Inst); 01333 DbgVal->setDebugLoc(DVI->getDebugLoc()); 01334 } 01335 } 01336 }; 01337 } // end anon namespace 01338 01339 01340 namespace { 01341 /// \brief An optimization pass providing Scalar Replacement of Aggregates. 01342 /// 01343 /// This pass takes allocations which can be completely analyzed (that is, they 01344 /// don't escape) and tries to turn them into scalar SSA values. There are 01345 /// a few steps to this process. 01346 /// 01347 /// 1) It takes allocations of aggregates and analyzes the ways in which they 01348 /// are used to try to split them into smaller allocations, ideally of 01349 /// a single scalar data type. It will split up memcpy and memset accesses 01350 /// as necessary and try to isolate individual scalar accesses. 01351 /// 2) It will transform accesses into forms which are suitable for SSA value 01352 /// promotion. This can be replacing a memset with a scalar store of an 01353 /// integer value, or it can involve speculating operations on a PHI or 01354 /// select to be a PHI or select of the results. 01355 /// 3) Finally, this will try to detect a pattern of accesses which map cleanly 01356 /// onto insert and extract operations on a vector value, and convert them to 01357 /// this form. By doing so, it will enable promotion of vector aggregates to 01358 /// SSA vector values. 01359 class SROA : public FunctionPass { 01360 const bool RequiresDomTree; 01361 01362 LLVMContext *C; 01363 const DataLayout *TD; 01364 DominatorTree *DT; 01365 01366 /// \brief Worklist of alloca instructions to simplify. 01367 /// 01368 /// Each alloca in the function is added to this. Each new alloca formed gets 01369 /// added to it as well to recursively simplify unless that alloca can be 01370 /// directly promoted. Finally, each time we rewrite a use of an alloca other 01371 /// the one being actively rewritten, we add it back onto the list if not 01372 /// already present to ensure it is re-visited. 01373 SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > Worklist; 01374 01375 /// \brief A collection of instructions to delete. 01376 /// We try to batch deletions to simplify code and make things a bit more 01377 /// efficient. 01378 SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts; 01379 01380 /// \brief Post-promotion worklist. 01381 /// 01382 /// Sometimes we discover an alloca which has a high probability of becoming 01383 /// viable for SROA after a round of promotion takes place. In those cases, 01384 /// the alloca is enqueued here for re-processing. 01385 /// 01386 /// Note that we have to be very careful to clear allocas out of this list in 01387 /// the event they are deleted. 01388 SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > PostPromotionWorklist; 01389 01390 /// \brief A collection of alloca instructions we can directly promote. 01391 std::vector<AllocaInst *> PromotableAllocas; 01392 01393 public: 01394 SROA(bool RequiresDomTree = true) 01395 : FunctionPass(ID), RequiresDomTree(RequiresDomTree), 01396 C(0), TD(0), DT(0) { 01397 initializeSROAPass(*PassRegistry::getPassRegistry()); 01398 } 01399 bool runOnFunction(Function &F); 01400 void getAnalysisUsage(AnalysisUsage &AU) const; 01401 01402 const char *getPassName() const { return "SROA"; } 01403 static char ID; 01404 01405 private: 01406 friend class PHIOrSelectSpeculator; 01407 friend class AllocaPartitionRewriter; 01408 friend class AllocaPartitionVectorRewriter; 01409 01410 bool rewriteAllocaPartition(AllocaInst &AI, 01411 AllocaPartitioning &P, 01412 AllocaPartitioning::iterator PI); 01413 bool splitAlloca(AllocaInst &AI, AllocaPartitioning &P); 01414 bool runOnAlloca(AllocaInst &AI); 01415 void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas); 01416 bool promoteAllocas(Function &F); 01417 }; 01418 } 01419 01420 char SROA::ID = 0; 01421 01422 FunctionPass *llvm::createSROAPass(bool RequiresDomTree) { 01423 return new SROA(RequiresDomTree); 01424 } 01425 01426 INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", 01427 false, false) 01428 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 01429 INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", 01430 false, false) 01431 01432 namespace { 01433 /// \brief Visitor to speculate PHIs and Selects where possible. 01434 class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> { 01435 // Befriend the base class so it can delegate to private visit methods. 01436 friend class llvm::InstVisitor<PHIOrSelectSpeculator>; 01437 01438 const DataLayout &TD; 01439 AllocaPartitioning &P; 01440 SROA &Pass; 01441 01442 public: 01443 PHIOrSelectSpeculator(const DataLayout &TD, AllocaPartitioning &P, SROA &Pass) 01444 : TD(TD), P(P), Pass(Pass) {} 01445 01446 /// \brief Visit the users of an alloca partition and rewrite them. 01447 void visitUsers(AllocaPartitioning::const_iterator PI) { 01448 // Note that we need to use an index here as the underlying vector of uses 01449 // may be grown during speculation. However, we never need to re-visit the 01450 // new uses, and so we can use the initial size bound. 01451 for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { 01452 const PartitionUse &PU = P.getUse(PI, Idx); 01453 if (!PU.getUse()) 01454 continue; // Skip dead use. 01455 01456 visit(cast<Instruction>(PU.getUse()->getUser())); 01457 } 01458 } 01459 01460 private: 01461 // By default, skip this instruction. 01462 void visitInstruction(Instruction &I) {} 01463 01464 /// PHI instructions that use an alloca and are subsequently loaded can be 01465 /// rewritten to load both input pointers in the pred blocks and then PHI the 01466 /// results, allowing the load of the alloca to be promoted. 01467 /// From this: 01468 /// %P2 = phi [i32* %Alloca, i32* %Other] 01469 /// %V = load i32* %P2 01470 /// to: 01471 /// %V1 = load i32* %Alloca -> will be mem2reg'd 01472 /// ... 01473 /// %V2 = load i32* %Other 01474 /// ... 01475 /// %V = phi [i32 %V1, i32 %V2] 01476 /// 01477 /// We can do this to a select if its only uses are loads and if the operands 01478 /// to the select can be loaded unconditionally. 01479 /// 01480 /// FIXME: This should be hoisted into a generic utility, likely in 01481 /// Transforms/Util/Local.h 01482 bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) { 01483 // For now, we can only do this promotion if the load is in the same block 01484 // as the PHI, and if there are no stores between the phi and load. 01485 // TODO: Allow recursive phi users. 01486 // TODO: Allow stores. 01487 BasicBlock *BB = PN.getParent(); 01488 unsigned MaxAlign = 0; 01489 for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end(); 01490 UI != UE; ++UI) { 01491 LoadInst *LI = dyn_cast<LoadInst>(*UI); 01492 if (LI == 0 || !LI->isSimple()) return false; 01493 01494 // For now we only allow loads in the same block as the PHI. This is 01495 // a common case that happens when instcombine merges two loads through 01496 // a PHI. 01497 if (LI->getParent() != BB) return false; 01498 01499 // Ensure that there are no instructions between the PHI and the load that 01500 // could store. 01501 for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI) 01502 if (BBI->mayWriteToMemory()) 01503 return false; 01504 01505 MaxAlign = std::max(MaxAlign, LI->getAlignment()); 01506 Loads.push_back(LI); 01507 } 01508 01509 // We can only transform this if it is safe to push the loads into the 01510 // predecessor blocks. The only thing to watch out for is that we can't put 01511 // a possibly trapping load in the predecessor if it is a critical edge. 01512 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { 01513 TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); 01514 Value *InVal = PN.getIncomingValue(Idx); 01515 01516 // If the value is produced by the terminator of the predecessor (an 01517 // invoke) or it has side-effects, there is no valid place to put a load 01518 // in the predecessor. 01519 if (TI == InVal || TI->mayHaveSideEffects()) 01520 return false; 01521 01522 // If the predecessor has a single successor, then the edge isn't 01523 // critical. 01524 if (TI->getNumSuccessors() == 1) 01525 continue; 01526 01527 // If this pointer is always safe to load, or if we can prove that there 01528 // is already a load in the block, then we can move the load to the pred 01529 // block. 01530 if (InVal->isDereferenceablePointer() || 01531 isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD)) 01532 continue; 01533 01534 return false; 01535 } 01536 01537 return true; 01538 } 01539 01540 void visitPHINode(PHINode &PN) { 01541 DEBUG(dbgs() << " original: " << PN << "\n"); 01542 01543 SmallVector<LoadInst *, 4> Loads; 01544 if (!isSafePHIToSpeculate(PN, Loads)) 01545 return; 01546 01547 assert(!Loads.empty()); 01548 01549 Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); 01550 IRBuilderTy PHIBuilder(&PN); 01551 PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), 01552 PN.getName() + ".sroa.speculated"); 01553 01554 // Get the TBAA tag and alignment to use from one of the loads. It doesn't 01555 // matter which one we get and if any differ. 01556 LoadInst *SomeLoad = cast<LoadInst>(Loads.back()); 01557 MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); 01558 unsigned Align = SomeLoad->getAlignment(); 01559 01560 // Rewrite all loads of the PN to use the new PHI. 01561 do { 01562 LoadInst *LI = Loads.pop_back_val(); 01563 LI->replaceAllUsesWith(NewPN); 01564 Pass.DeadInsts.insert(LI); 01565 } while (!Loads.empty()); 01566 01567 // Inject loads into all of the pred blocks. 01568 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { 01569 BasicBlock *Pred = PN.getIncomingBlock(Idx); 01570 TerminatorInst *TI = Pred->getTerminator(); 01571 Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); 01572 Value *InVal = PN.getIncomingValue(Idx); 01573 IRBuilderTy PredBuilder(TI); 01574 01575 LoadInst *Load 01576 = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + 01577 Pred->getName())); 01578 ++NumLoadsSpeculated; 01579 Load->setAlignment(Align); 01580 if (TBAATag) 01581 Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); 01582 NewPN->addIncoming(Load, Pred); 01583 01584 Instruction *Ptr = dyn_cast<Instruction>(InVal); 01585 if (!Ptr) 01586 // No uses to rewrite. 01587 continue; 01588 01589 // Try to lookup and rewrite any partition uses corresponding to this phi 01590 // input. 01591 AllocaPartitioning::iterator PI 01592 = P.findPartitionForPHIOrSelectOperand(InUse); 01593 if (PI == P.end()) 01594 continue; 01595 01596 // Replace the Use in the PartitionUse for this operand with the Use 01597 // inside the load. 01598 AllocaPartitioning::use_iterator UI 01599 = P.findPartitionUseForPHIOrSelectOperand(InUse); 01600 assert(isa<PHINode>(*UI->getUse()->getUser())); 01601 UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex())); 01602 } 01603 DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); 01604 } 01605 01606 /// Select instructions that use an alloca and are subsequently loaded can be 01607 /// rewritten to load both input pointers and then select between the result, 01608 /// allowing the load of the alloca to be promoted. 01609 /// From this: 01610 /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other 01611 /// %V = load i32* %P2 01612 /// to: 01613 /// %V1 = load i32* %Alloca -> will be mem2reg'd 01614 /// %V2 = load i32* %Other 01615 /// %V = select i1 %cond, i32 %V1, i32 %V2 01616 /// 01617 /// We can do this to a select if its only uses are loads and if the operand 01618 /// to the select can be loaded unconditionally. 01619 bool isSafeSelectToSpeculate(SelectInst &SI, 01620 SmallVectorImpl<LoadInst *> &Loads) { 01621 Value *TValue = SI.getTrueValue(); 01622 Value *FValue = SI.getFalseValue(); 01623 bool TDerefable = TValue->isDereferenceablePointer(); 01624 bool FDerefable = FValue->isDereferenceablePointer(); 01625 01626 for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end(); 01627 UI != UE; ++UI) { 01628 LoadInst *LI = dyn_cast<LoadInst>(*UI); 01629 if (LI == 0 || !LI->isSimple()) return false; 01630 01631 // Both operands to the select need to be dereferencable, either 01632 // absolutely (e.g. allocas) or at this point because we can see other 01633 // accesses to it. 01634 if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI, 01635 LI->getAlignment(), &TD)) 01636 return false; 01637 if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI, 01638 LI->getAlignment(), &TD)) 01639 return false; 01640 Loads.push_back(LI); 01641 } 01642 01643 return true; 01644 } 01645 01646 void visitSelectInst(SelectInst &SI) { 01647 DEBUG(dbgs() << " original: " << SI << "\n"); 01648 01649 // If the select isn't safe to speculate, just use simple logic to emit it. 01650 SmallVector<LoadInst *, 4> Loads; 01651 if (!isSafeSelectToSpeculate(SI, Loads)) 01652 return; 01653 01654 IRBuilderTy IRB(&SI); 01655 Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; 01656 AllocaPartitioning::iterator PIs[2]; 01657 PartitionUse PUs[2]; 01658 for (unsigned i = 0, e = 2; i != e; ++i) { 01659 PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); 01660 if (PIs[i] != P.end()) { 01661 // If the pointer is within the partitioning, remove the select from 01662 // its uses. We'll add in the new loads below. 01663 AllocaPartitioning::use_iterator UI 01664 = P.findPartitionUseForPHIOrSelectOperand(Ops[i]); 01665 PUs[i] = *UI; 01666 // Clear out the use here so that the offsets into the use list remain 01667 // stable but this use is ignored when rewriting. 01668 UI->setUse(0); 01669 } 01670 } 01671 01672 Value *TV = SI.getTrueValue(); 01673 Value *FV = SI.getFalseValue(); 01674 // Replace the loads of the select with a select of two loads. 01675 while (!Loads.empty()) { 01676 LoadInst *LI = Loads.pop_back_val(); 01677 01678 IRB.SetInsertPoint(LI); 01679 LoadInst *TL = 01680 IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true"); 01681 LoadInst *FL = 01682 IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false"); 01683 NumLoadsSpeculated += 2; 01684 01685 // Transfer alignment and TBAA info if present. 01686 TL->setAlignment(LI->getAlignment()); 01687 FL->setAlignment(LI->getAlignment()); 01688 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { 01689 TL->setMetadata(LLVMContext::MD_tbaa, Tag); 01690 FL->setMetadata(LLVMContext::MD_tbaa, Tag); 01691 } 01692 01693 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, 01694 LI->getName() + ".sroa.speculated"); 01695 01696 LoadInst *Loads[2] = { TL, FL }; 01697 for (unsigned i = 0, e = 2; i != e; ++i) { 01698 if (PIs[i] != P.end()) { 01699 Use *LoadUse = &Loads[i]->getOperandUse(0); 01700 assert(PUs[i].getUse()->get() == LoadUse->get()); 01701 PUs[i].setUse(LoadUse); 01702 P.use_push_back(PIs[i], PUs[i]); 01703 } 01704 } 01705 01706 DEBUG(dbgs() << " speculated to: " << *V << "\n"); 01707 LI->replaceAllUsesWith(V); 01708 Pass.DeadInsts.insert(LI); 01709 } 01710 } 01711 }; 01712 } 01713 01714 /// \brief Build a GEP out of a base pointer and indices. 01715 /// 01716 /// This will return the BasePtr if that is valid, or build a new GEP 01717 /// instruction using the IRBuilder if GEP-ing is needed. 01718 static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, 01719 SmallVectorImpl<Value *> &Indices) { 01720 if (Indices.empty()) 01721 return BasePtr; 01722 01723 // A single zero index is a no-op, so check for this and avoid building a GEP 01724 // in that case. 01725 if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) 01726 return BasePtr; 01727 01728 return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx"); 01729 } 01730 01731 /// \brief Get a natural GEP off of the BasePtr walking through Ty toward 01732 /// TargetTy without changing the offset of the pointer. 01733 /// 01734 /// This routine assumes we've already established a properly offset GEP with 01735 /// Indices, and arrived at the Ty type. The goal is to continue to GEP with 01736 /// zero-indices down through type layers until we find one the same as 01737 /// TargetTy. If we can't find one with the same type, we at least try to use 01738 /// one with the same size. If none of that works, we just produce the GEP as 01739 /// indicated by Indices to have the correct offset. 01740 static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD, 01741 Value *BasePtr, Type *Ty, Type *TargetTy, 01742 SmallVectorImpl<Value *> &Indices) { 01743 if (Ty == TargetTy) 01744 return buildGEP(IRB, BasePtr, Indices); 01745 01746 // See if we can descend into a struct and locate a field with the correct 01747 // type. 01748 unsigned NumLayers = 0; 01749 Type *ElementTy = Ty; 01750 do { 01751 if (ElementTy->isPointerTy()) 01752 break; 01753 if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) { 01754 ElementTy = SeqTy->getElementType(); 01755 // Note that we use the default address space as this index is over an 01756 // array or a vector, not a pointer. 01757 Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(0), 0))); 01758 } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) { 01759 if (STy->element_begin() == STy->element_end()) 01760 break; // Nothing left to descend into. 01761 ElementTy = *STy->element_begin(); 01762 Indices.push_back(IRB.getInt32(0)); 01763 } else { 01764 break; 01765 } 01766 ++NumLayers; 01767 } while (ElementTy != TargetTy); 01768 if (ElementTy != TargetTy) 01769 Indices.erase(Indices.end() - NumLayers, Indices.end()); 01770 01771 return buildGEP(IRB, BasePtr, Indices); 01772 } 01773 01774 /// \brief Recursively compute indices for a natural GEP. 01775 /// 01776 /// This is the recursive step for getNaturalGEPWithOffset that walks down the 01777 /// element types adding appropriate indices for the GEP. 01778 static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD, 01779 Value *Ptr, Type *Ty, APInt &Offset, 01780 Type *TargetTy, 01781 SmallVectorImpl<Value *> &Indices) { 01782 if (Offset == 0) 01783 return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices); 01784 01785 // We can't recurse through pointer types. 01786 if (Ty->isPointerTy()) 01787 return 0; 01788 01789 // We try to analyze GEPs over vectors here, but note that these GEPs are 01790 // extremely poorly defined currently. The long-term goal is to remove GEPing 01791 // over a vector from the IR completely. 01792 if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) { 01793 unsigned ElementSizeInBits = TD.getTypeSizeInBits(VecTy->getScalarType()); 01794 if (ElementSizeInBits % 8) 01795 return 0; // GEPs over non-multiple of 8 size vector elements are invalid. 01796 APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); 01797 APInt NumSkippedElements = Offset.sdiv(ElementSize); 01798 if (NumSkippedElements.ugt(VecTy->getNumElements())) 01799 return 0; 01800 Offset -= NumSkippedElements * ElementSize; 01801 Indices.push_back(IRB.getInt(NumSkippedElements)); 01802 return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), 01803 Offset, TargetTy, Indices); 01804 } 01805 01806 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { 01807 Type *ElementTy = ArrTy->getElementType(); 01808 APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); 01809 APInt NumSkippedElements = Offset.sdiv(ElementSize); 01810 if (NumSkippedElements.ugt(ArrTy->getNumElements())) 01811 return 0; 01812 01813 Offset -= NumSkippedElements * ElementSize; 01814 Indices.push_back(IRB.getInt(NumSkippedElements)); 01815 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 01816 Indices); 01817 } 01818 01819 StructType *STy = dyn_cast<StructType>(Ty); 01820 if (!STy) 01821 return 0; 01822 01823 const StructLayout *SL = TD.getStructLayout(STy); 01824 uint64_t StructOffset = Offset.getZExtValue(); 01825 if (StructOffset >= SL->getSizeInBytes()) 01826 return 0; 01827 unsigned Index = SL->getElementContainingOffset(StructOffset); 01828 Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); 01829 Type *ElementTy = STy->getElementType(Index); 01830 if (Offset.uge(TD.getTypeAllocSize(ElementTy))) 01831 return 0; // The offset points into alignment padding. 01832 01833 Indices.push_back(IRB.getInt32(Index)); 01834 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 01835 Indices); 01836 } 01837 01838 /// \brief Get a natural GEP from a base pointer to a particular offset and 01839 /// resulting in a particular type. 01840 /// 01841 /// The goal is to produce a "natural" looking GEP that works with the existing 01842 /// composite types to arrive at the appropriate offset and element type for 01843 /// a pointer. TargetTy is the element type the returned GEP should point-to if 01844 /// possible. We recurse by decreasing Offset, adding the appropriate index to 01845 /// Indices, and setting Ty to the result subtype. 01846 /// 01847 /// If no natural GEP can be constructed, this function returns null. 01848 static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD, 01849 Value *Ptr, APInt Offset, Type *TargetTy, 01850 SmallVectorImpl<Value *> &Indices) { 01851 PointerType *Ty = cast<PointerType>(Ptr->getType()); 01852 01853 // Don't consider any GEPs through an i8* as natural unless the TargetTy is 01854 // an i8. 01855 if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8)) 01856 return 0; 01857 01858 Type *ElementTy = Ty->getElementType(); 01859 if (!ElementTy->isSized()) 01860 return 0; // We can't GEP through an unsized element. 01861 APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); 01862 if (ElementSize == 0) 01863 return 0; // Zero-length arrays can't help us build a natural GEP. 01864 APInt NumSkippedElements = Offset.sdiv(ElementSize); 01865 01866 Offset -= NumSkippedElements * ElementSize; 01867 Indices.push_back(IRB.getInt(NumSkippedElements)); 01868 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 01869 Indices); 01870 } 01871 01872 /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the 01873 /// resulting pointer has PointerTy. 01874 /// 01875 /// This tries very hard to compute a "natural" GEP which arrives at the offset 01876 /// and produces the pointer type desired. Where it cannot, it will try to use 01877 /// the natural GEP to arrive at the offset and bitcast to the type. Where that 01878 /// fails, it will try to use an existing i8* and GEP to the byte offset and 01879 /// bitcast to the type. 01880 /// 01881 /// The strategy for finding the more natural GEPs is to peel off layers of the 01882 /// pointer, walking back through bit casts and GEPs, searching for a base 01883 /// pointer from which we can compute a natural GEP with the desired 01884 /// properties. The algorithm tries to fold as many constant indices into 01885 /// a single GEP as possible, thus making each GEP more independent of the 01886 /// surrounding code. 01887 static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD, 01888 Value *Ptr, APInt Offset, Type *PointerTy) { 01889 // Even though we don't look through PHI nodes, we could be called on an 01890 // instruction in an unreachable block, which may be on a cycle. 01891 SmallPtrSet<Value *, 4> Visited; 01892 Visited.insert(Ptr); 01893 SmallVector<Value *, 4> Indices; 01894 01895 // We may end up computing an offset pointer that has the wrong type. If we 01896 // never are able to compute one directly that has the correct type, we'll 01897 // fall back to it, so keep it around here. 01898 Value *OffsetPtr = 0; 01899 01900 // Remember any i8 pointer we come across to re-use if we need to do a raw 01901 // byte offset. 01902 Value *Int8Ptr = 0; 01903 APInt Int8PtrOffset(Offset.getBitWidth(), 0); 01904 01905 Type *TargetTy = PointerTy->getPointerElementType(); 01906 01907 do { 01908 // First fold any existing GEPs into the offset. 01909 while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { 01910 APInt GEPOffset(Offset.getBitWidth(), 0); 01911 if (!GEP->accumulateConstantOffset(TD, GEPOffset)) 01912 break; 01913 Offset += GEPOffset; 01914 Ptr = GEP->getPointerOperand(); 01915 if (!Visited.insert(Ptr)) 01916 break; 01917 } 01918 01919 // See if we can perform a natural GEP here. 01920 Indices.clear(); 01921 if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, 01922 Indices)) { 01923 if (P->getType() == PointerTy) { 01924 // Zap any offset pointer that we ended up computing in previous rounds. 01925 if (OffsetPtr && OffsetPtr->use_empty()) 01926 if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) 01927 I->eraseFromParent(); 01928 return P; 01929 } 01930 if (!OffsetPtr) { 01931 OffsetPtr = P; 01932 } 01933 } 01934 01935 // Stash this pointer if we've found an i8*. 01936 if (Ptr->getType()->isIntegerTy(8)) { 01937 Int8Ptr = Ptr; 01938 Int8PtrOffset = Offset; 01939 } 01940 01941 // Peel off a layer of the pointer and update the offset appropriately. 01942 if (Operator::getOpcode(Ptr) == Instruction::BitCast) { 01943 Ptr = cast<Operator>(Ptr)->getOperand(0); 01944 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { 01945 if (GA->mayBeOverridden()) 01946 break; 01947 Ptr = GA->getAliasee(); 01948 } else { 01949 break; 01950 } 01951 assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!"); 01952 } while (Visited.insert(Ptr)); 01953 01954 if (!OffsetPtr) { 01955 if (!Int8Ptr) { 01956 Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), 01957 "raw_cast"); 01958 Int8PtrOffset = Offset; 01959 } 01960 01961 OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : 01962 IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), 01963 "raw_idx"); 01964 } 01965 Ptr = OffsetPtr; 01966 01967 // On the off chance we were targeting i8*, guard the bitcast here. 01968 if (Ptr->getType() != PointerTy) 01969 Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast"); 01970 01971 return Ptr; 01972 } 01973 01974 /// \brief Test whether we can convert a value from the old to the new type. 01975 /// 01976 /// This predicate should be used to guard calls to convertValue in order to 01977 /// ensure that we only try to convert viable values. The strategy is that we 01978 /// will peel off single element struct and array wrappings to get to an 01979 /// underlying value, and convert that value. 01980 static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { 01981 if (OldTy == NewTy) 01982 return true; 01983 if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) 01984 if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) 01985 if (NewITy->getBitWidth() >= OldITy->getBitWidth()) 01986 return true; 01987 if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) 01988 return false; 01989 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) 01990 return false; 01991 01992 if (NewTy->isPointerTy() || OldTy->isPointerTy()) { 01993 if (NewTy->isPointerTy() && OldTy->isPointerTy()) 01994 return true; 01995 if (NewTy->isIntegerTy() || OldTy->isIntegerTy()) 01996 return true; 01997 return false; 01998 } 01999 02000 return true; 02001 } 02002 02003 /// \brief Generic routine to convert an SSA value to a value of a different 02004 /// type. 02005 /// 02006 /// This will try various different casting techniques, such as bitcasts, 02007 /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test 02008 /// two types for viability with this routine. 02009 static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, 02010 Type *Ty) { 02011 assert(canConvertValue(DL, V->getType(), Ty) && 02012 "Value not convertable to type"); 02013 if (V->getType() == Ty) 02014 return V; 02015 if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType())) 02016 if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty)) 02017 if (NewITy->getBitWidth() > OldITy->getBitWidth()) 02018 return IRB.CreateZExt(V, NewITy); 02019 if (V->getType()->isIntegerTy() && Ty->isPointerTy()) 02020 return IRB.CreateIntToPtr(V, Ty); 02021 if (V->getType()->isPointerTy() && Ty->isIntegerTy()) 02022 return IRB.CreatePtrToInt(V, Ty); 02023 02024 return IRB.CreateBitCast(V, Ty); 02025 } 02026 02027 /// \brief Test whether the given alloca partition can be promoted to a vector. 02028 /// 02029 /// This is a quick test to check whether we can rewrite a particular alloca 02030 /// partition (and its newly formed alloca) into a vector alloca with only 02031 /// whole-vector loads and stores such that it could be promoted to a vector 02032 /// SSA value. We only can ensure this for a limited set of operations, and we 02033 /// don't want to do the rewrites unless we are confident that the result will 02034 /// be promotable, so we have an early test here. 02035 static bool isVectorPromotionViable(const DataLayout &TD, 02036 Type *AllocaTy, 02037 AllocaPartitioning &P, 02038 uint64_t PartitionBeginOffset, 02039 uint64_t PartitionEndOffset, 02040 AllocaPartitioning::const_use_iterator I, 02041 AllocaPartitioning::const_use_iterator E) { 02042 VectorType *Ty = dyn_cast<VectorType>(AllocaTy); 02043 if (!Ty) 02044 return false; 02045 02046 uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType()); 02047 02048 // While the definition of LLVM vectors is bitpacked, we don't support sizes 02049 // that aren't byte sized. 02050 if (ElementSize % 8) 02051 return false; 02052 assert((TD.getTypeSizeInBits(Ty) % 8) == 0 && 02053 "vector size not a multiple of element size?"); 02054 ElementSize /= 8; 02055 02056 for (; I != E; ++I) { 02057 Use *U = I->getUse(); 02058 if (!U) 02059 continue; // Skip dead use. 02060 02061 uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; 02062 uint64_t BeginIndex = BeginOffset / ElementSize; 02063 if (BeginIndex * ElementSize != BeginOffset || 02064 BeginIndex >= Ty->getNumElements()) 02065 return false; 02066 uint64_t EndOffset = I->EndOffset - PartitionBeginOffset; 02067 uint64_t EndIndex = EndOffset / ElementSize; 02068 if (EndIndex * ElementSize != EndOffset || 02069 EndIndex > Ty->getNumElements()) 02070 return false; 02071 02072 assert(EndIndex > BeginIndex && "Empty vector!"); 02073 uint64_t NumElements = EndIndex - BeginIndex; 02074 Type *PartitionTy 02075 = (NumElements == 1) ? Ty->getElementType() 02076 : VectorType::get(Ty->getElementType(), NumElements); 02077 02078 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { 02079 if (MI->isVolatile()) 02080 return false; 02081 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { 02082 const AllocaPartitioning::MemTransferOffsets &MTO 02083 = P.getMemTransferOffsets(*MTI); 02084 if (!MTO.IsSplittable) 02085 return false; 02086 } 02087 } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { 02088 // Disable vector promotion when there are loads or stores of an FCA. 02089 return false; 02090 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { 02091 if (LI->isVolatile()) 02092 return false; 02093 if (!canConvertValue(TD, PartitionTy, LI->getType())) 02094 return false; 02095 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { 02096 if (SI->isVolatile()) 02097 return false; 02098 if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) 02099 return false; 02100 } else { 02101 return false; 02102 } 02103 } 02104 return true; 02105 } 02106 02107 /// \brief Test whether the given alloca partition's integer operations can be 02108 /// widened to promotable ones. 02109 /// 02110 /// This is a quick test to check whether we can rewrite the integer loads and 02111 /// stores to a particular alloca into wider loads and stores and be able to 02112 /// promote the resulting alloca. 02113 static bool isIntegerWideningViable(const DataLayout &TD, 02114 Type *AllocaTy, 02115 uint64_t AllocBeginOffset, 02116 AllocaPartitioning &P, 02117 AllocaPartitioning::const_use_iterator I, 02118 AllocaPartitioning::const_use_iterator E) { 02119 uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy); 02120 // Don't create integer types larger than the maximum bitwidth. 02121 if (SizeInBits > IntegerType::MAX_INT_BITS) 02122 return false; 02123 02124 // Don't try to handle allocas with bit-padding. 02125 if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy)) 02126 return false; 02127 02128 // We need to ensure that an integer type with the appropriate bitwidth can 02129 // be converted to the alloca type, whatever that is. We don't want to force 02130 // the alloca itself to have an integer type if there is a more suitable one. 02131 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits); 02132 if (!canConvertValue(TD, AllocaTy, IntTy) || 02133 !canConvertValue(TD, IntTy, AllocaTy)) 02134 return false; 02135 02136 uint64_t Size = TD.getTypeStoreSize(AllocaTy); 02137 02138 // Check the uses to ensure the uses are (likely) promotable integer uses. 02139 // Also ensure that the alloca has a covering load or store. We don't want 02140 // to widen the integer operations only to fail to promote due to some other 02141 // unsplittable entry (which we may make splittable later). 02142 bool WholeAllocaOp = false; 02143 for (; I != E; ++I) { 02144 Use *U = I->getUse(); 02145 if (!U) 02146 continue; // Skip dead use. 02147 02148 uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; 02149 uint64_t RelEnd = I->EndOffset - AllocBeginOffset; 02150 02151 // We can't reasonably handle cases where the load or store extends past 02152 // the end of the aloca's type and into its padding. 02153 if (RelEnd > Size) 02154 return false; 02155 02156 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { 02157 if (LI->isVolatile()) 02158 return false; 02159 if (RelBegin == 0 && RelEnd == Size) 02160 WholeAllocaOp = true; 02161 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { 02162 if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) 02163 return false; 02164 continue; 02165 } 02166 // Non-integer loads need to be convertible from the alloca type so that 02167 // they are promotable. 02168 if (RelBegin != 0 || RelEnd != Size || 02169 !canConvertValue(TD, AllocaTy, LI->getType())) 02170 return false; 02171 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { 02172 Type *ValueTy = SI->getValueOperand()->getType(); 02173 if (SI->isVolatile()) 02174 return false; 02175 if (RelBegin == 0 && RelEnd == Size) 02176 WholeAllocaOp = true; 02177 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { 02178 if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) 02179 return false; 02180 continue; 02181 } 02182 // Non-integer stores need to be convertible to the alloca type so that 02183 // they are promotable. 02184 if (RelBegin != 0 || RelEnd != Size || 02185 !canConvertValue(TD, ValueTy, AllocaTy)) 02186 return false; 02187 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { 02188 if (MI->isVolatile() || !isa<Constant>(MI->getLength())) 02189 return false; 02190 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { 02191 const AllocaPartitioning::MemTransferOffsets &MTO 02192 = P.getMemTransferOffsets(*MTI); 02193 if (!MTO.IsSplittable) 02194 return false; 02195 } 02196 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { 02197 if (II->getIntrinsicID() != Intrinsic::lifetime_start && 02198 II->getIntrinsicID() != Intrinsic::lifetime_end) 02199 return false; 02200 } else { 02201 return false; 02202 } 02203 } 02204 return WholeAllocaOp; 02205 } 02206 02207 static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, 02208 IntegerType *Ty, uint64_t Offset, 02209 const Twine &Name) { 02210 DEBUG(dbgs() << " start: " << *V << "\n"); 02211 IntegerType *IntTy = cast<IntegerType>(V->getType()); 02212 assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && 02213 "Element extends past full value"); 02214 uint64_t ShAmt = 8*Offset; 02215 if (DL.isBigEndian()) 02216 ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); 02217 if (ShAmt) { 02218 V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); 02219 DEBUG(dbgs() << " shifted: " << *V << "\n"); 02220 } 02221 assert(Ty->getBitWidth() <= IntTy->getBitWidth() && 02222 "Cannot extract to a larger integer!"); 02223 if (Ty != IntTy) { 02224 V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); 02225 DEBUG(dbgs() << " trunced: " << *V << "\n"); 02226 } 02227 return V; 02228 } 02229 02230 static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, 02231 Value *V, uint64_t Offset, const Twine &Name) { 02232 IntegerType *IntTy = cast<IntegerType>(Old->getType()); 02233 IntegerType *Ty = cast<IntegerType>(V->getType()); 02234 assert(Ty->getBitWidth() <= IntTy->getBitWidth() && 02235 "Cannot insert a larger integer!"); 02236 DEBUG(dbgs() << " start: " << *V << "\n"); 02237 if (Ty != IntTy) { 02238 V = IRB.CreateZExt(V, IntTy, Name + ".ext"); 02239 DEBUG(dbgs() << " extended: " << *V << "\n"); 02240 } 02241 assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && 02242 "Element store outside of alloca store"); 02243 uint64_t ShAmt = 8*Offset; 02244 if (DL.isBigEndian()) 02245 ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); 02246 if (ShAmt) { 02247 V = IRB.CreateShl(V, ShAmt, Name + ".shift"); 02248 DEBUG(dbgs() << " shifted: " << *V << "\n"); 02249 } 02250 02251 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { 02252 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); 02253 Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); 02254 DEBUG(dbgs() << " masked: " << *Old << "\n"); 02255 V = IRB.CreateOr(Old, V, Name + ".insert"); 02256 DEBUG(dbgs() << " inserted: " << *V << "\n"); 02257 } 02258 return V; 02259 } 02260 02261 static Value *extractVector(IRBuilderTy &IRB, Value *V, 02262 unsigned BeginIndex, unsigned EndIndex, 02263 const Twine &Name) { 02264 VectorType *VecTy = cast<VectorType>(V->getType()); 02265 unsigned NumElements = EndIndex - BeginIndex; 02266 assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); 02267 02268 if (NumElements == VecTy->getNumElements()) 02269 return V; 02270 02271 if (NumElements == 1) { 02272 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), 02273 Name + ".extract"); 02274 DEBUG(dbgs() << " extract: " << *V << "\n"); 02275 return V; 02276 } 02277 02278 SmallVector<Constant*, 8> Mask; 02279 Mask.reserve(NumElements); 02280 for (unsigned i = BeginIndex; i != EndIndex; ++i) 02281 Mask.push_back(IRB.getInt32(i)); 02282 V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), 02283 ConstantVector::get(Mask), 02284 Name + ".extract"); 02285 DEBUG(dbgs() << " shuffle: " << *V << "\n"); 02286 return V; 02287 } 02288 02289 static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, 02290 unsigned BeginIndex, const Twine &Name) { 02291 VectorType *VecTy = cast<VectorType>(Old->getType()); 02292 assert(VecTy && "Can only insert a vector into a vector"); 02293 02294 VectorType *Ty = dyn_cast<VectorType>(V->getType()); 02295 if (!Ty) { 02296 // Single element to insert. 02297 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), 02298 Name + ".insert"); 02299 DEBUG(dbgs() << " insert: " << *V << "\n"); 02300 return V; 02301 } 02302 02303 assert(Ty->getNumElements() <= VecTy->getNumElements() && 02304 "Too many elements!"); 02305 if (Ty->getNumElements() == VecTy->getNumElements()) { 02306 assert(V->getType() == VecTy && "Vector type mismatch"); 02307 return V; 02308 } 02309 unsigned EndIndex = BeginIndex + Ty->getNumElements(); 02310 02311 // When inserting a smaller vector into the larger to store, we first 02312 // use a shuffle vector to widen it with undef elements, and then 02313 // a second shuffle vector to select between the loaded vector and the 02314 // incoming vector. 02315 SmallVector<Constant*, 8> Mask; 02316 Mask.reserve(VecTy->getNumElements()); 02317 for (unsigned i = 0; i != VecTy->getNumElements(); ++i) 02318 if (i >= BeginIndex && i < EndIndex) 02319 Mask.push_back(IRB.getInt32(i - BeginIndex)); 02320 else 02321 Mask.push_back(UndefValue::get(IRB.getInt32Ty())); 02322 V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), 02323 ConstantVector::get(Mask), 02324 Name + ".expand"); 02325 DEBUG(dbgs() << " shuffle: " << *V << "\n"); 02326 02327 Mask.clear(); 02328 for (unsigned i = 0; i != VecTy->getNumElements(); ++i) 02329 Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); 02330 02331 V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend"); 02332 02333 DEBUG(dbgs() << " blend: " << *V << "\n"); 02334 return V; 02335 } 02336 02337 namespace { 02338 /// \brief Visitor to rewrite instructions using a partition of an alloca to 02339 /// use a new alloca. 02340 /// 02341 /// Also implements the rewriting to vector-based accesses when the partition 02342 /// passes the isVectorPromotionViable predicate. Most of the rewriting logic 02343 /// lives here. 02344 class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, 02345 bool> { 02346 // Befriend the base class so it can delegate to private visit methods. 02347 friend class llvm::InstVisitor<AllocaPartitionRewriter, bool>; 02348 02349 const DataLayout &TD; 02350 AllocaPartitioning &P; 02351 SROA &Pass; 02352 AllocaInst &OldAI, &NewAI; 02353 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; 02354 Type *NewAllocaTy; 02355 02356 // If we are rewriting an alloca partition which can be written as pure 02357 // vector operations, we stash extra information here. When VecTy is 02358 // non-null, we have some strict guarantees about the rewritten alloca: 02359 // - The new alloca is exactly the size of the vector type here. 02360 // - The accesses all either map to the entire vector or to a single 02361 // element. 02362 // - The set of accessing instructions is only one of those handled above 02363 // in isVectorPromotionViable. Generally these are the same access kinds 02364 // which are promotable via mem2reg. 02365 VectorType *VecTy; 02366 Type *ElementTy; 02367 uint64_t ElementSize; 02368 02369 // This is a convenience and flag variable that will be null unless the new 02370 // alloca's integer operations should be widened to this integer type due to 02371 // passing isIntegerWideningViable above. If it is non-null, the desired 02372 // integer type will be stored here for easy access during rewriting. 02373 IntegerType *IntTy; 02374 02375 // The offset of the partition user currently being rewritten. 02376 uint64_t BeginOffset, EndOffset; 02377 bool IsSplit; 02378 Use *OldUse; 02379 Instruction *OldPtr; 02380 02381 // Utility IR builder, whose name prefix is setup for each visited use, and 02382 // the insertion point is set to point to the user. 02383 IRBuilderTy IRB; 02384 02385 public: 02386 AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P, 02387 AllocaPartitioning::iterator PI, 02388 SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI, 02389 uint64_t NewBeginOffset, uint64_t NewEndOffset) 02390 : TD(TD), P(P), Pass(Pass), 02391 OldAI(OldAI), NewAI(NewAI), 02392 NewAllocaBeginOffset(NewBeginOffset), 02393 NewAllocaEndOffset(NewEndOffset), 02394 NewAllocaTy(NewAI.getAllocatedType()), 02395 VecTy(), ElementTy(), ElementSize(), IntTy(), 02396 BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(), 02397 IRB(NewAI.getContext(), ConstantFolder()) { 02398 } 02399 02400 /// \brief Visit the users of the alloca partition and rewrite them. 02401 bool visitUsers(AllocaPartitioning::const_use_iterator I, 02402 AllocaPartitioning::const_use_iterator E) { 02403 if (isVectorPromotionViable(TD, NewAI.getAllocatedType(), P, 02404 NewAllocaBeginOffset, NewAllocaEndOffset, 02405 I, E)) { 02406 ++NumVectorized; 02407 VecTy = cast<VectorType>(NewAI.getAllocatedType()); 02408 ElementTy = VecTy->getElementType(); 02409 assert((TD.getTypeSizeInBits(VecTy->getScalarType()) % 8) == 0 && 02410 "Only multiple-of-8 sized vector elements are viable"); 02411 ElementSize = TD.getTypeSizeInBits(VecTy->getScalarType()) / 8; 02412 } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(), 02413 NewAllocaBeginOffset, P, I, E)) { 02414 IntTy = Type::getIntNTy(NewAI.getContext(), 02415 TD.getTypeSizeInBits(NewAI.getAllocatedType())); 02416 } 02417 bool CanSROA = true; 02418 for (; I != E; ++I) { 02419 if (!I->getUse()) 02420 continue; // Skip dead uses. 02421 BeginOffset = I->BeginOffset; 02422 EndOffset = I->EndOffset; 02423 IsSplit = I->isSplit(); 02424 OldUse = I->getUse(); 02425 OldPtr = cast<Instruction>(OldUse->get()); 02426 02427 Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); 02428 IRB.SetInsertPoint(OldUserI); 02429 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); 02430 IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + 02431 "."); 02432 02433 CanSROA &= visit(cast<Instruction>(OldUse->getUser())); 02434 } 02435 if (VecTy) { 02436 assert(CanSROA); 02437 VecTy = 0; 02438 ElementTy = 0; 02439 ElementSize = 0; 02440 } 02441 if (IntTy) { 02442 assert(CanSROA); 02443 IntTy = 0; 02444 } 02445 return CanSROA; 02446 } 02447 02448 private: 02449 // Every instruction which can end up as a user must have a rewrite rule. 02450 bool visitInstruction(Instruction &I) { 02451 DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n"); 02452 llvm_unreachable("No rewrite rule for this instruction!"); 02453 } 02454 02455 Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) { 02456 assert(BeginOffset >= NewAllocaBeginOffset); 02457 APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); 02458 return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy); 02459 } 02460 02461 /// \brief Compute suitable alignment to access an offset into the new alloca. 02462 unsigned getOffsetAlign(uint64_t Offset) { 02463 unsigned NewAIAlign = NewAI.getAlignment(); 02464 if (!NewAIAlign) 02465 NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType()); 02466 return MinAlign(NewAIAlign, Offset); 02467 } 02468 02469 /// \brief Compute suitable alignment to access this partition of the new 02470 /// alloca. 02471 unsigned getPartitionAlign() { 02472 return getOffsetAlign(BeginOffset - NewAllocaBeginOffset); 02473 } 02474 02475 /// \brief Compute suitable alignment to access a type at an offset of the 02476 /// new alloca. 02477 /// 02478 /// \returns zero if the type's ABI alignment is a suitable alignment, 02479 /// otherwise returns the maximal suitable alignment. 02480 unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) { 02481 unsigned Align = getOffsetAlign(Offset); 02482 return Align == TD.getABITypeAlignment(Ty) ? 0 : Align; 02483 } 02484 02485 /// \brief Compute suitable alignment to access a type at the beginning of 02486 /// this partition of the new alloca. 02487 /// 02488 /// See \c getOffsetTypeAlign for details; this routine delegates to it. 02489 unsigned getPartitionTypeAlign(Type *Ty) { 02490 return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset); 02491 } 02492 02493 unsigned getIndex(uint64_t Offset) { 02494 assert(VecTy && "Can only call getIndex when rewriting a vector"); 02495 uint64_t RelOffset = Offset - NewAllocaBeginOffset; 02496 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds"); 02497 uint32_t Index = RelOffset / ElementSize; 02498 assert(Index * ElementSize == RelOffset); 02499 return Index; 02500 } 02501 02502 void deleteIfTriviallyDead(Value *V) { 02503 Instruction *I = cast<Instruction>(V); 02504 if (isInstructionTriviallyDead(I)) 02505 Pass.DeadInsts.insert(I); 02506 } 02507 02508 Value *rewriteVectorizedLoadInst() { 02509 unsigned BeginIndex = getIndex(BeginOffset); 02510 unsigned EndIndex = getIndex(EndOffset); 02511 assert(EndIndex > BeginIndex && "Empty vector!"); 02512 02513 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02514 "load"); 02515 return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); 02516 } 02517 02518 Value *rewriteIntegerLoad(LoadInst &LI) { 02519 assert(IntTy && "We cannot insert an integer to the alloca"); 02520 assert(!LI.isVolatile()); 02521 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02522 "load"); 02523 V = convertValue(TD, IRB, V, IntTy); 02524 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 02525 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 02526 if (Offset > 0 || EndOffset < NewAllocaEndOffset) 02527 V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, 02528 "extract"); 02529 return V; 02530 } 02531 02532 bool visitLoadInst(LoadInst &LI) { 02533 DEBUG(dbgs() << " original: " << LI << "\n"); 02534 Value *OldOp = LI.getOperand(0); 02535 assert(OldOp == OldPtr); 02536 02537 uint64_t Size = EndOffset - BeginOffset; 02538 02539 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) 02540 : LI.getType(); 02541 bool IsPtrAdjusted = false; 02542 Value *V; 02543 if (VecTy) { 02544 V = rewriteVectorizedLoadInst(); 02545 } else if (IntTy && LI.getType()->isIntegerTy()) { 02546 V = rewriteIntegerLoad(LI); 02547 } else if (BeginOffset == NewAllocaBeginOffset && 02548 canConvertValue(TD, NewAllocaTy, LI.getType())) { 02549 V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02550 LI.isVolatile(), "load"); 02551 } else { 02552 Type *LTy = TargetTy->getPointerTo(); 02553 V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), 02554 getPartitionTypeAlign(TargetTy), 02555 LI.isVolatile(), "load"); 02556 IsPtrAdjusted = true; 02557 } 02558 V = convertValue(TD, IRB, V, TargetTy); 02559 02560 if (IsSplit) { 02561 assert(!LI.isVolatile()); 02562 assert(LI.getType()->isIntegerTy() && 02563 "Only integer type loads and stores are split"); 02564 assert(Size < TD.getTypeStoreSize(LI.getType()) && 02565 "Split load isn't smaller than original load"); 02566 assert(LI.getType()->getIntegerBitWidth() == 02567 TD.getTypeStoreSizeInBits(LI.getType()) && 02568 "Non-byte-multiple bit width"); 02569 // Move the insertion point just past the load so that we can refer to it. 02570 IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); 02571 // Create a placeholder value with the same type as LI to use as the 02572 // basis for the new value. This allows us to replace the uses of LI with 02573 // the computed value, and then replace the placeholder with LI, leaving 02574 // LI only used for this computation. 02575 Value *Placeholder 02576 = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); 02577 V = insertInteger(TD, IRB, Placeholder, V, BeginOffset, 02578 "insert"); 02579 LI.replaceAllUsesWith(V); 02580 Placeholder->replaceAllUsesWith(&LI); 02581 delete Placeholder; 02582 } else { 02583 LI.replaceAllUsesWith(V); 02584 } 02585 02586 Pass.DeadInsts.insert(&LI); 02587 deleteIfTriviallyDead(OldOp); 02588 DEBUG(dbgs() << " to: " << *V << "\n"); 02589 return !LI.isVolatile() && !IsPtrAdjusted; 02590 } 02591 02592 bool rewriteVectorizedStoreInst(Value *V, 02593 StoreInst &SI, Value *OldOp) { 02594 unsigned BeginIndex = getIndex(BeginOffset); 02595 unsigned EndIndex = getIndex(EndOffset); 02596 assert(EndIndex > BeginIndex && "Empty vector!"); 02597 unsigned NumElements = EndIndex - BeginIndex; 02598 assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); 02599 Type *PartitionTy 02600 = (NumElements == 1) ? ElementTy 02601 : VectorType::get(ElementTy, NumElements); 02602 if (V->getType() != PartitionTy) 02603 V = convertValue(TD, IRB, V, PartitionTy); 02604 02605 // Mix in the existing elements. 02606 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02607 "load"); 02608 V = insertVector(IRB, Old, V, BeginIndex, "vec"); 02609 02610 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); 02611 Pass.DeadInsts.insert(&SI); 02612 02613 (void)Store; 02614 DEBUG(dbgs() << " to: " << *Store << "\n"); 02615 return true; 02616 } 02617 02618 bool rewriteIntegerStore(Value *V, StoreInst &SI) { 02619 assert(IntTy && "We cannot extract an integer from the alloca"); 02620 assert(!SI.isVolatile()); 02621 if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { 02622 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02623 "oldload"); 02624 Old = convertValue(TD, IRB, Old, IntTy); 02625 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 02626 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 02627 V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset, 02628 "insert"); 02629 } 02630 V = convertValue(TD, IRB, V, NewAllocaTy); 02631 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); 02632 Pass.DeadInsts.insert(&SI); 02633 (void)Store; 02634 DEBUG(dbgs() << " to: " << *Store << "\n"); 02635 return true; 02636 } 02637 02638 bool visitStoreInst(StoreInst &SI) { 02639 DEBUG(dbgs() << " original: " << SI << "\n"); 02640 Value *OldOp = SI.getOperand(1); 02641 assert(OldOp == OldPtr); 02642 02643 Value *V = SI.getValueOperand(); 02644 02645 // Strip all inbounds GEPs and pointer casts to try to dig out any root 02646 // alloca that should be re-examined after promoting this alloca. 02647 if (V->getType()->isPointerTy()) 02648 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) 02649 Pass.PostPromotionWorklist.insert(AI); 02650 02651 uint64_t Size = EndOffset - BeginOffset; 02652 if (Size < TD.getTypeStoreSize(V->getType())) { 02653 assert(!SI.isVolatile()); 02654 assert(IsSplit && "A seemingly split store isn't splittable"); 02655 assert(V->getType()->isIntegerTy() && 02656 "Only integer type loads and stores are split"); 02657 assert(V->getType()->getIntegerBitWidth() == 02658 TD.getTypeStoreSizeInBits(V->getType()) && 02659 "Non-byte-multiple bit width"); 02660 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); 02661 V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, 02662 "extract"); 02663 } 02664 02665 if (VecTy) 02666 return rewriteVectorizedStoreInst(V, SI, OldOp); 02667 if (IntTy && V->getType()->isIntegerTy()) 02668 return rewriteIntegerStore(V, SI); 02669 02670 StoreInst *NewSI; 02671 if (BeginOffset == NewAllocaBeginOffset && 02672 EndOffset == NewAllocaEndOffset && 02673 canConvertValue(TD, V->getType(), NewAllocaTy)) { 02674 V = convertValue(TD, IRB, V, NewAllocaTy); 02675 NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), 02676 SI.isVolatile()); 02677 } else { 02678 Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo()); 02679 NewSI = IRB.CreateAlignedStore(V, NewPtr, 02680 getPartitionTypeAlign(V->getType()), 02681 SI.isVolatile()); 02682 } 02683 (void)NewSI; 02684 Pass.DeadInsts.insert(&SI); 02685 deleteIfTriviallyDead(OldOp); 02686 02687 DEBUG(dbgs() << " to: " << *NewSI << "\n"); 02688 return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); 02689 } 02690 02691 /// \brief Compute an integer value from splatting an i8 across the given 02692 /// number of bytes. 02693 /// 02694 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't 02695 /// call this routine. 02696 /// FIXME: Heed the advice above. 02697 /// 02698 /// \param V The i8 value to splat. 02699 /// \param Size The number of bytes in the output (assuming i8 is one byte) 02700 Value *getIntegerSplat(Value *V, unsigned Size) { 02701 assert(Size > 0 && "Expected a positive number of bytes."); 02702 IntegerType *VTy = cast<IntegerType>(V->getType()); 02703 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); 02704 if (Size == 1) 02705 return V; 02706 02707 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); 02708 V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"), 02709 ConstantExpr::getUDiv( 02710 Constant::getAllOnesValue(SplatIntTy), 02711 ConstantExpr::getZExt( 02712 Constant::getAllOnesValue(V->getType()), 02713 SplatIntTy)), 02714 "isplat"); 02715 return V; 02716 } 02717 02718 /// \brief Compute a vector splat for a given element value. 02719 Value *getVectorSplat(Value *V, unsigned NumElements) { 02720 V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); 02721 DEBUG(dbgs() << " splat: " << *V << "\n"); 02722 return V; 02723 } 02724 02725 bool visitMemSetInst(MemSetInst &II) { 02726 DEBUG(dbgs() << " original: " << II << "\n"); 02727 assert(II.getRawDest() == OldPtr); 02728 02729 // If the memset has a variable size, it cannot be split, just adjust the 02730 // pointer to the new alloca. 02731 if (!isa<Constant>(II.getLength())) { 02732 II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); 02733 Type *CstTy = II.getAlignmentCst()->getType(); 02734 II.setAlignment(ConstantInt::get(CstTy, getPartitionAlign())); 02735 02736 deleteIfTriviallyDead(OldPtr); 02737 return false; 02738 } 02739 02740 // Record this instruction for deletion. 02741 Pass.DeadInsts.insert(&II); 02742 02743 Type *AllocaTy = NewAI.getAllocatedType(); 02744 Type *ScalarTy = AllocaTy->getScalarType(); 02745 02746 // If this doesn't map cleanly onto the alloca type, and that type isn't 02747 // a single value type, just emit a memset. 02748 if (!VecTy && !IntTy && 02749 (BeginOffset != NewAllocaBeginOffset || 02750 EndOffset != NewAllocaEndOffset || 02751 !AllocaTy->isSingleValueType() || 02752 !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) || 02753 TD.getTypeSizeInBits(ScalarTy)%8 != 0)) { 02754 Type *SizeTy = II.getLength()->getType(); 02755 Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); 02756 CallInst *New 02757 = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, 02758 II.getRawDest()->getType()), 02759 II.getValue(), Size, getPartitionAlign(), 02760 II.isVolatile()); 02761 (void)New; 02762 DEBUG(dbgs() << " to: " << *New << "\n"); 02763 return false; 02764 } 02765 02766 // If we can represent this as a simple value, we have to build the actual 02767 // value to store, which requires expanding the byte present in memset to 02768 // a sensible representation for the alloca type. This is essentially 02769 // splatting the byte to a sufficiently wide integer, splatting it across 02770 // any desired vector width, and bitcasting to the final type. 02771 Value *V; 02772 02773 if (VecTy) { 02774 // If this is a memset of a vectorized alloca, insert it. 02775 assert(ElementTy == ScalarTy); 02776 02777 unsigned BeginIndex = getIndex(BeginOffset); 02778 unsigned EndIndex = getIndex(EndOffset); 02779 assert(EndIndex > BeginIndex && "Empty vector!"); 02780 unsigned NumElements = EndIndex - BeginIndex; 02781 assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); 02782 02783 Value *Splat = 02784 getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8); 02785 Splat = convertValue(TD, IRB, Splat, ElementTy); 02786 if (NumElements > 1) 02787 Splat = getVectorSplat(Splat, NumElements); 02788 02789 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02790 "oldload"); 02791 V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); 02792 } else if (IntTy) { 02793 // If this is a memset on an alloca where we can widen stores, insert the 02794 // set integer. 02795 assert(!II.isVolatile()); 02796 02797 uint64_t Size = EndOffset - BeginOffset; 02798 V = getIntegerSplat(II.getValue(), Size); 02799 02800 if (IntTy && (BeginOffset != NewAllocaBeginOffset || 02801 EndOffset != NewAllocaBeginOffset)) { 02802 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02803 "oldload"); 02804 Old = convertValue(TD, IRB, Old, IntTy); 02805 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 02806 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 02807 V = insertInteger(TD, IRB, Old, V, Offset, "insert"); 02808 } else { 02809 assert(V->getType() == IntTy && 02810 "Wrong type for an alloca wide integer!"); 02811 } 02812 V = convertValue(TD, IRB, V, AllocaTy); 02813 } else { 02814 // Established these invariants above. 02815 assert(BeginOffset == NewAllocaBeginOffset); 02816 assert(EndOffset == NewAllocaEndOffset); 02817 02818 V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8); 02819 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) 02820 V = getVectorSplat(V, AllocaVecTy->getNumElements()); 02821 02822 V = convertValue(TD, IRB, V, AllocaTy); 02823 } 02824 02825 Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), 02826 II.isVolatile()); 02827 (void)New; 02828 DEBUG(dbgs() << " to: " << *New << "\n"); 02829 return !II.isVolatile(); 02830 } 02831 02832 bool visitMemTransferInst(MemTransferInst &II) { 02833 // Rewriting of memory transfer instructions can be a bit tricky. We break 02834 // them into two categories: split intrinsics and unsplit intrinsics. 02835 02836 DEBUG(dbgs() << " original: " << II << "\n"); 02837 02838 assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr); 02839 bool IsDest = II.getRawDest() == OldPtr; 02840 02841 const AllocaPartitioning::MemTransferOffsets &MTO 02842 = P.getMemTransferOffsets(II); 02843 02844 // Compute the relative offset within the transfer. 02845 unsigned IntPtrWidth = TD.getPointerSizeInBits(); 02846 APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin 02847 : MTO.SourceBegin)); 02848 02849 unsigned Align = II.getAlignment(); 02850 if (Align > 1) 02851 Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), 02852 MinAlign(II.getAlignment(), getPartitionAlign())); 02853 02854 // For unsplit intrinsics, we simply modify the source and destination 02855 // pointers in place. This isn't just an optimization, it is a matter of 02856 // correctness. With unsplit intrinsics we may be dealing with transfers 02857 // within a single alloca before SROA ran, or with transfers that have 02858 // a variable length. We may also be dealing with memmove instead of 02859 // memcpy, and so simply updating the pointers is the necessary for us to 02860 // update both source and dest of a single call. 02861 if (!MTO.IsSplittable) { 02862 Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource(); 02863 if (IsDest) 02864 II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); 02865 else 02866 II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType())); 02867 02868 Type *CstTy = II.getAlignmentCst()->getType(); 02869 II.setAlignment(ConstantInt::get(CstTy, Align)); 02870 02871 DEBUG(dbgs() << " to: " << II << "\n"); 02872 deleteIfTriviallyDead(OldOp); 02873 return false; 02874 } 02875 // For split transfer intrinsics we have an incredibly useful assurance: 02876 // the source and destination do not reside within the same alloca, and at 02877 // least one of them does not escape. This means that we can replace 02878 // memmove with memcpy, and we don't need to worry about all manner of 02879 // downsides to splitting and transforming the operations. 02880 02881 // If this doesn't map cleanly onto the alloca type, and that type isn't 02882 // a single value type, just emit a memcpy. 02883 bool EmitMemCpy 02884 = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset || 02885 EndOffset != NewAllocaEndOffset || 02886 !NewAI.getAllocatedType()->isSingleValueType()); 02887 02888 // If we're just going to emit a memcpy, the alloca hasn't changed, and the 02889 // size hasn't been shrunk based on analysis of the viable range, this is 02890 // a no-op. 02891 if (EmitMemCpy && &OldAI == &NewAI) { 02892 uint64_t OrigBegin = IsDest ? MTO.DestBegin : MTO.SourceBegin; 02893 uint64_t OrigEnd = IsDest ? MTO.DestEnd : MTO.SourceEnd; 02894 // Ensure the start lines up. 02895 assert(BeginOffset == OrigBegin); 02896 (void)OrigBegin; 02897 02898 // Rewrite the size as needed. 02899 if (EndOffset != OrigEnd) 02900 II.setLength(ConstantInt::get(II.getLength()->getType(), 02901 EndOffset - BeginOffset)); 02902 return false; 02903 } 02904 // Record this instruction for deletion. 02905 Pass.DeadInsts.insert(&II); 02906 02907 // Strip all inbounds GEPs and pointer casts to try to dig out any root 02908 // alloca that should be re-examined after rewriting this instruction. 02909 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); 02910 if (AllocaInst *AI 02911 = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) 02912 Pass.Worklist.insert(AI); 02913 02914 if (EmitMemCpy) { 02915 Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() 02916 : II.getRawDest()->getType(); 02917 02918 // Compute the other pointer, folding as much as possible to produce 02919 // a single, simple GEP in most cases. 02920 OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); 02921 02922 Value *OurPtr 02923 = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() 02924 : II.getRawSource()->getType()); 02925 Type *SizeTy = II.getLength()->getType(); 02926 Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); 02927 02928 CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr, 02929 IsDest ? OtherPtr : OurPtr, 02930 Size, Align, II.isVolatile()); 02931 (void)New; 02932 DEBUG(dbgs() << " to: " << *New << "\n"); 02933 return false; 02934 } 02935 02936 // Note that we clamp the alignment to 1 here as a 0 alignment for a memcpy 02937 // is equivalent to 1, but that isn't true if we end up rewriting this as 02938 // a load or store. 02939 if (!Align) 02940 Align = 1; 02941 02942 bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && 02943 EndOffset == NewAllocaEndOffset; 02944 uint64_t Size = EndOffset - BeginOffset; 02945 unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0; 02946 unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0; 02947 unsigned NumElements = EndIndex - BeginIndex; 02948 IntegerType *SubIntTy 02949 = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; 02950 02951 Type *OtherPtrTy = NewAI.getType(); 02952 if (VecTy && !IsWholeAlloca) { 02953 if (NumElements == 1) 02954 OtherPtrTy = VecTy->getElementType(); 02955 else 02956 OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements); 02957 02958 OtherPtrTy = OtherPtrTy->getPointerTo(); 02959 } else if (IntTy && !IsWholeAlloca) { 02960 OtherPtrTy = SubIntTy->getPointerTo(); 02961 } 02962 02963 Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); 02964 Value *DstPtr = &NewAI; 02965 if (!IsDest) 02966 std::swap(SrcPtr, DstPtr); 02967 02968 Value *Src; 02969 if (VecTy && !IsWholeAlloca && !IsDest) { 02970 Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02971 "load"); 02972 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); 02973 } else if (IntTy && !IsWholeAlloca && !IsDest) { 02974 Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02975 "load"); 02976 Src = convertValue(TD, IRB, Src, IntTy); 02977 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 02978 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 02979 Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract"); 02980 } else { 02981 Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), 02982 "copyload"); 02983 } 02984 02985 if (VecTy && !IsWholeAlloca && IsDest) { 02986 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02987 "oldload"); 02988 Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); 02989 } else if (IntTy && !IsWholeAlloca && IsDest) { 02990 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 02991 "oldload"); 02992 Old = convertValue(TD, IRB, Old, IntTy); 02993 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 02994 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 02995 Src = insertInteger(TD, IRB, Old, Src, Offset, "insert"); 02996 Src = convertValue(TD, IRB, Src, NewAllocaTy); 02997 } 02998 02999 StoreInst *Store = cast<StoreInst>( 03000 IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); 03001 (void)Store; 03002 DEBUG(dbgs() << " to: " << *Store << "\n"); 03003 return !II.isVolatile(); 03004 } 03005 03006 bool visitIntrinsicInst(IntrinsicInst &II) { 03007 assert(II.getIntrinsicID() == Intrinsic::lifetime_start || 03008 II.getIntrinsicID() == Intrinsic::lifetime_end); 03009 DEBUG(dbgs() << " original: " << II << "\n"); 03010 assert(II.getArgOperand(1) == OldPtr); 03011 03012 // Record this instruction for deletion. 03013 Pass.DeadInsts.insert(&II); 03014 03015 ConstantInt *Size 03016 = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), 03017 EndOffset - BeginOffset); 03018 Value *Ptr = getAdjustedAllocaPtr(IRB, II.getArgOperand(1)->getType()); 03019 Value *New; 03020 if (II.getIntrinsicID() == Intrinsic::lifetime_start) 03021 New = IRB.CreateLifetimeStart(Ptr, Size); 03022 else 03023 New = IRB.CreateLifetimeEnd(Ptr, Size); 03024 03025 (void)New; 03026 DEBUG(dbgs() << " to: " << *New << "\n"); 03027 return true; 03028 } 03029 03030 bool visitPHINode(PHINode &PN) { 03031 DEBUG(dbgs() << " original: " << PN << "\n"); 03032 03033 // We would like to compute a new pointer in only one place, but have it be 03034 // as local as possible to the PHI. To do that, we re-use the location of 03035 // the old pointer, which necessarily must be in the right position to 03036 // dominate the PHI. 03037 IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr)); 03038 PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + 03039 "."); 03040 03041 Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType()); 03042 // Replace the operands which were using the old pointer. 03043 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr); 03044 03045 DEBUG(dbgs() << " to: " << PN << "\n"); 03046 deleteIfTriviallyDead(OldPtr); 03047 return false; 03048 } 03049 03050 bool visitSelectInst(SelectInst &SI) { 03051 DEBUG(dbgs() << " original: " << SI << "\n"); 03052 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && 03053 "Pointer isn't an operand!"); 03054 03055 Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType()); 03056 // Replace the operands which were using the old pointer. 03057 if (SI.getOperand(1) == OldPtr) 03058 SI.setOperand(1, NewPtr); 03059 if (SI.getOperand(2) == OldPtr) 03060 SI.setOperand(2, NewPtr); 03061 03062 DEBUG(dbgs() << " to: " << SI << "\n"); 03063 deleteIfTriviallyDead(OldPtr); 03064 return false; 03065 } 03066 03067 }; 03068 } 03069 03070 namespace { 03071 /// \brief Visitor to rewrite aggregate loads and stores as scalar. 03072 /// 03073 /// This pass aggressively rewrites all aggregate loads and stores on 03074 /// a particular pointer (or any pointer derived from it which we can identify) 03075 /// with scalar loads and stores. 03076 class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { 03077 // Befriend the base class so it can delegate to private visit methods. 03078 friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>; 03079 03080 const DataLayout &TD; 03081 03082 /// Queue of pointer uses to analyze and potentially rewrite. 03083 SmallVector<Use *, 8> Queue; 03084 03085 /// Set to prevent us from cycling with phi nodes and loops. 03086 SmallPtrSet<User *, 8> Visited; 03087 03088 /// The current pointer use being rewritten. This is used to dig up the used 03089 /// value (as opposed to the user). 03090 Use *U; 03091 03092 public: 03093 AggLoadStoreRewriter(const DataLayout &TD) : TD(TD) {} 03094 03095 /// Rewrite loads and stores through a pointer and all pointers derived from 03096 /// it. 03097 bool rewrite(Instruction &I) { 03098 DEBUG(dbgs() << " Rewriting FCA loads and stores...\n"); 03099 enqueueUsers(I); 03100 bool Changed = false; 03101 while (!Queue.empty()) { 03102 U = Queue.pop_back_val(); 03103 Changed |= visit(cast<Instruction>(U->getUser())); 03104 } 03105 return Changed; 03106 } 03107 03108 private: 03109 /// Enqueue all the users of the given instruction for further processing. 03110 /// This uses a set to de-duplicate users. 03111 void enqueueUsers(Instruction &I) { 03112 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; 03113 ++UI) 03114 if (Visited.insert(*UI)) 03115 Queue.push_back(&UI.getUse()); 03116 } 03117 03118 // Conservative default is to not rewrite anything. 03119 bool visitInstruction(Instruction &I) { return false; } 03120 03121 /// \brief Generic recursive split emission class. 03122 template <typename Derived> 03123 class OpSplitter { 03124 protected: 03125 /// The builder used to form new instructions. 03126 IRBuilderTy IRB; 03127 /// The indices which to be used with insert- or extractvalue to select the 03128 /// appropriate value within the aggregate. 03129 SmallVector<unsigned, 4> Indices; 03130 /// The indices to a GEP instruction which will move Ptr to the correct slot 03131 /// within the aggregate. 03132 SmallVector<Value *, 4> GEPIndices; 03133 /// The base pointer of the original op, used as a base for GEPing the 03134 /// split operations. 03135 Value *Ptr; 03136 03137 /// Initialize the splitter with an insertion point, Ptr and start with a 03138 /// single zero GEP index. 03139 OpSplitter(Instruction *InsertionPoint, Value *Ptr) 03140 : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {} 03141 03142 public: 03143 /// \brief Generic recursive split emission routine. 03144 /// 03145 /// This method recursively splits an aggregate op (load or store) into 03146 /// scalar or vector ops. It splits recursively until it hits a single value 03147 /// and emits that single value operation via the template argument. 03148 /// 03149 /// The logic of this routine relies on GEPs and insertvalue and 03150 /// extractvalue all operating with the same fundamental index list, merely 03151 /// formatted differently (GEPs need actual values). 03152 /// 03153 /// \param Ty The type being split recursively into smaller ops. 03154 /// \param Agg The aggregate value being built up or stored, depending on 03155 /// whether this is splitting a load or a store respectively. 03156 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { 03157 if (Ty->isSingleValueType()) 03158 return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name); 03159 03160 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 03161 unsigned OldSize = Indices.size(); 03162 (void)OldSize; 03163 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size; 03164 ++Idx) { 03165 assert(Indices.size() == OldSize && "Did not return to the old size"); 03166 Indices.push_back(Idx); 03167 GEPIndices.push_back(IRB.getInt32(Idx)); 03168 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx)); 03169 GEPIndices.pop_back(); 03170 Indices.pop_back(); 03171 } 03172 return; 03173 } 03174 03175 if (StructType *STy = dyn_cast<StructType>(Ty)) { 03176 unsigned OldSize = Indices.size(); 03177 (void)OldSize; 03178 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size; 03179 ++Idx) { 03180 assert(Indices.size() == OldSize && "Did not return to the old size"); 03181 Indices.push_back(Idx); 03182 GEPIndices.push_back(IRB.getInt32(Idx)); 03183 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx)); 03184 GEPIndices.pop_back(); 03185 Indices.pop_back(); 03186 } 03187 return; 03188 } 03189 03190 llvm_unreachable("Only arrays and structs are aggregate loadable types"); 03191 } 03192 }; 03193 03194 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { 03195 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr) 03196 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {} 03197 03198 /// Emit a leaf load of a single value. This is called at the leaves of the 03199 /// recursive emission to actually load values. 03200 void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { 03201 assert(Ty->isSingleValueType()); 03202 // Load the single value and insert it using the indices. 03203 Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"); 03204 Value *Load = IRB.CreateLoad(GEP, Name + ".load"); 03205 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); 03206 DEBUG(dbgs() << " to: " << *Load << "\n"); 03207 } 03208 }; 03209 03210 bool visitLoadInst(LoadInst &LI) { 03211 assert(LI.getPointerOperand() == *U); 03212 if (!LI.isSimple() || LI.getType()->isSingleValueType()) 03213 return false; 03214 03215 // We have an aggregate being loaded, split it apart. 03216 DEBUG(dbgs() << " original: " << LI << "\n"); 03217 LoadOpSplitter Splitter(&LI, *U); 03218 Value *V = UndefValue::get(LI.getType()); 03219 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); 03220 LI.replaceAllUsesWith(V); 03221 LI.eraseFromParent(); 03222 return true; 03223 } 03224 03225 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> { 03226 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr) 03227 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {} 03228 03229 /// Emit a leaf store of a single value. This is called at the leaves of the 03230 /// recursive emission to actually produce stores. 03231 void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { 03232 assert(Ty->isSingleValueType()); 03233 // Extract the single value and store it using the indices. 03234 Value *Store = IRB.CreateStore( 03235 IRB.CreateExtractValue(Agg, Indices, Name + ".extract"), 03236 IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep")); 03237 (void)Store; 03238 DEBUG(dbgs() << " to: " << *Store << "\n"); 03239 } 03240 }; 03241 03242 bool visitStoreInst(StoreInst &SI) { 03243 if (!SI.isSimple() || SI.getPointerOperand() != *U) 03244 return false; 03245 Value *V = SI.getValueOperand(); 03246 if (V->getType()->isSingleValueType()) 03247 return false; 03248 03249 // We have an aggregate being stored, split it apart. 03250 DEBUG(dbgs() << " original: " << SI << "\n"); 03251 StoreOpSplitter Splitter(&SI, *U); 03252 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); 03253 SI.eraseFromParent(); 03254 return true; 03255 } 03256 03257 bool visitBitCastInst(BitCastInst &BC) { 03258 enqueueUsers(BC); 03259 return false; 03260 } 03261 03262 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { 03263 enqueueUsers(GEPI); 03264 return false; 03265 } 03266 03267 bool visitPHINode(PHINode &PN) { 03268 enqueueUsers(PN); 03269 return false; 03270 } 03271 03272 bool visitSelectInst(SelectInst &SI) { 03273 enqueueUsers(SI); 03274 return false; 03275 } 03276 }; 03277 } 03278 03279 /// \brief Strip aggregate type wrapping. 03280 /// 03281 /// This removes no-op aggregate types wrapping an underlying type. It will 03282 /// strip as many layers of types as it can without changing either the type 03283 /// size or the allocated size. 03284 static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { 03285 if (Ty->isSingleValueType()) 03286 return Ty; 03287 03288 uint64_t AllocSize = DL.getTypeAllocSize(Ty); 03289 uint64_t TypeSize = DL.getTypeSizeInBits(Ty); 03290 03291 Type *InnerTy; 03292 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { 03293 InnerTy = ArrTy->getElementType(); 03294 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 03295 const StructLayout *SL = DL.getStructLayout(STy); 03296 unsigned Index = SL->getElementContainingOffset(0); 03297 InnerTy = STy->getElementType(Index); 03298 } else { 03299 return Ty; 03300 } 03301 03302 if (AllocSize > DL.getTypeAllocSize(InnerTy) || 03303 TypeSize > DL.getTypeSizeInBits(InnerTy)) 03304 return Ty; 03305 03306 return stripAggregateTypeWrapping(DL, InnerTy); 03307 } 03308 03309 /// \brief Try to find a partition of the aggregate type passed in for a given 03310 /// offset and size. 03311 /// 03312 /// This recurses through the aggregate type and tries to compute a subtype 03313 /// based on the offset and size. When the offset and size span a sub-section 03314 /// of an array, it will even compute a new array type for that sub-section, 03315 /// and the same for structs. 03316 /// 03317 /// Note that this routine is very strict and tries to find a partition of the 03318 /// type which produces the *exact* right offset and size. It is not forgiving 03319 /// when the size or offset cause either end of type-based partition to be off. 03320 /// Also, this is a best-effort routine. It is reasonable to give up and not 03321 /// return a type if necessary. 03322 static Type *getTypePartition(const DataLayout &TD, Type *Ty, 03323 uint64_t Offset, uint64_t Size) { 03324 if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size) 03325 return stripAggregateTypeWrapping(TD, Ty); 03326 if (Offset > TD.getTypeAllocSize(Ty) || 03327 (TD.getTypeAllocSize(Ty) - Offset) < Size) 03328 return 0; 03329 03330 if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { 03331 // We can't partition pointers... 03332 if (SeqTy->isPointerTy()) 03333 return 0; 03334 03335 Type *ElementTy = SeqTy->getElementType(); 03336 uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); 03337 uint64_t NumSkippedElements = Offset / ElementSize; 03338 if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { 03339 if (NumSkippedElements >= ArrTy->getNumElements()) 03340 return 0; 03341 } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { 03342 if (NumSkippedElements >= VecTy->getNumElements()) 03343 return 0; 03344 } 03345 Offset -= NumSkippedElements * ElementSize; 03346 03347 // First check if we need to recurse. 03348 if (Offset > 0 || Size < ElementSize) { 03349 // Bail if the partition ends in a different array element. 03350 if ((Offset + Size) > ElementSize) 03351 return 0; 03352 // Recurse through the element type trying to peel off offset bytes. 03353 return getTypePartition(TD, ElementTy, Offset, Size); 03354 } 03355 assert(Offset == 0); 03356 03357 if (Size == ElementSize) 03358 return stripAggregateTypeWrapping(TD, ElementTy); 03359 assert(Size > ElementSize); 03360 uint64_t NumElements = Size / ElementSize; 03361 if (NumElements * ElementSize != Size) 03362 return 0; 03363 return ArrayType::get(ElementTy, NumElements); 03364 } 03365 03366 StructType *STy = dyn_cast<StructType>(Ty); 03367 if (!STy) 03368 return 0; 03369 03370 const StructLayout *SL = TD.getStructLayout(STy); 03371 if (Offset >= SL->getSizeInBytes()) 03372 return 0; 03373 uint64_t EndOffset = Offset + Size; 03374 if (EndOffset > SL->getSizeInBytes()) 03375 return 0; 03376 03377 unsigned Index = SL->getElementContainingOffset(Offset); 03378 Offset -= SL->getElementOffset(Index); 03379 03380 Type *ElementTy = STy->getElementType(Index); 03381 uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); 03382 if (Offset >= ElementSize) 03383 return 0; // The offset points into alignment padding. 03384 03385 // See if any partition must be contained by the element. 03386 if (Offset > 0 || Size < ElementSize) { 03387 if ((Offset + Size) > ElementSize) 03388 return 0; 03389 return getTypePartition(TD, ElementTy, Offset, Size); 03390 } 03391 assert(Offset == 0); 03392 03393 if (Size == ElementSize) 03394 return stripAggregateTypeWrapping(TD, ElementTy); 03395 03396 StructType::element_iterator EI = STy->element_begin() + Index, 03397 EE = STy->element_end(); 03398 if (EndOffset < SL->getSizeInBytes()) { 03399 unsigned EndIndex = SL->getElementContainingOffset(EndOffset); 03400 if (Index == EndIndex) 03401 return 0; // Within a single element and its padding. 03402 03403 // Don't try to form "natural" types if the elements don't line up with the 03404 // expected size. 03405 // FIXME: We could potentially recurse down through the last element in the 03406 // sub-struct to find a natural end point. 03407 if (SL->getElementOffset(EndIndex) != EndOffset) 03408 return 0; 03409 03410 assert(Index < EndIndex); 03411 EE = STy->element_begin() + EndIndex; 03412 } 03413 03414 // Try to build up a sub-structure. 03415 StructType *SubTy = StructType::get(STy->getContext(), makeArrayRef(EI, EE), 03416 STy->isPacked()); 03417 const StructLayout *SubSL = TD.getStructLayout(SubTy); 03418 if (Size != SubSL->getSizeInBytes()) 03419 return 0; // The sub-struct doesn't have quite the size needed. 03420 03421 return SubTy; 03422 } 03423 03424 /// \brief Rewrite an alloca partition's users. 03425 /// 03426 /// This routine drives both of the rewriting goals of the SROA pass. It tries 03427 /// to rewrite uses of an alloca partition to be conducive for SSA value 03428 /// promotion. If the partition needs a new, more refined alloca, this will 03429 /// build that new alloca, preserving as much type information as possible, and 03430 /// rewrite the uses of the old alloca to point at the new one and have the 03431 /// appropriate new offsets. It also evaluates how successful the rewrite was 03432 /// at enabling promotion and if it was successful queues the alloca to be 03433 /// promoted. 03434 bool SROA::rewriteAllocaPartition(AllocaInst &AI, 03435 AllocaPartitioning &P, 03436 AllocaPartitioning::iterator PI) { 03437 uint64_t AllocaSize = PI->EndOffset - PI->BeginOffset; 03438 bool IsLive = false; 03439 for (AllocaPartitioning::use_iterator UI = P.use_begin(PI), 03440 UE = P.use_end(PI); 03441 UI != UE && !IsLive; ++UI) 03442 if (UI->getUse()) 03443 IsLive = true; 03444 if (!IsLive) 03445 return false; // No live uses left of this partition. 03446 03447 DEBUG(dbgs() << "Speculating PHIs and selects in partition " 03448 << "[" << PI->BeginOffset << "," << PI->EndOffset << ")\n"); 03449 03450 PHIOrSelectSpeculator Speculator(*TD, P, *this); 03451 DEBUG(dbgs() << " speculating "); 03452 DEBUG(P.print(dbgs(), PI, "")); 03453 Speculator.visitUsers(PI); 03454 03455 // Try to compute a friendly type for this partition of the alloca. This 03456 // won't always succeed, in which case we fall back to a legal integer type 03457 // or an i8 array of an appropriate size. 03458 Type *AllocaTy = 0; 03459 if (Type *PartitionTy = P.getCommonType(PI)) 03460 if (TD->getTypeAllocSize(PartitionTy) >= AllocaSize) 03461 AllocaTy = PartitionTy; 03462 if (!AllocaTy) 03463 if (Type *PartitionTy = getTypePartition(*TD, AI.getAllocatedType(), 03464 PI->BeginOffset, AllocaSize)) 03465 AllocaTy = PartitionTy; 03466 if ((!AllocaTy || 03467 (AllocaTy->isArrayTy() && 03468 AllocaTy->getArrayElementType()->isIntegerTy())) && 03469 TD->isLegalInteger(AllocaSize * 8)) 03470 AllocaTy = Type::getIntNTy(*C, AllocaSize * 8); 03471 if (!AllocaTy) 03472 AllocaTy = ArrayType::get(Type::getInt8Ty(*C), AllocaSize); 03473 assert(TD->getTypeAllocSize(AllocaTy) >= AllocaSize); 03474 03475 // Check for the case where we're going to rewrite to a new alloca of the 03476 // exact same type as the original, and with the same access offsets. In that 03477 // case, re-use the existing alloca, but still run through the rewriter to 03478 // perform phi and select speculation. 03479 AllocaInst *NewAI; 03480 if (AllocaTy == AI.getAllocatedType()) { 03481 assert(PI->BeginOffset == 0 && 03482 "Non-zero begin offset but same alloca type"); 03483 assert(PI == P.begin() && "Begin offset is zero on later partition"); 03484 NewAI = &AI; 03485 } else { 03486 unsigned Alignment = AI.getAlignment(); 03487 if (!Alignment) { 03488 // The minimum alignment which users can rely on when the explicit 03489 // alignment is omitted or zero is that required by the ABI for this 03490 // type. 03491 Alignment = TD->getABITypeAlignment(AI.getAllocatedType()); 03492 } 03493 Alignment = MinAlign(Alignment, PI->BeginOffset); 03494 // If we will get at least this much alignment from the type alone, leave 03495 // the alloca's alignment unconstrained. 03496 if (Alignment <= TD->getABITypeAlignment(AllocaTy)) 03497 Alignment = 0; 03498 NewAI = new AllocaInst(AllocaTy, 0, Alignment, 03499 AI.getName() + ".sroa." + Twine(PI - P.begin()), 03500 &AI); 03501 ++NumNewAllocas; 03502 } 03503 03504 DEBUG(dbgs() << "Rewriting alloca partition " 03505 << "[" << PI->BeginOffset << "," << PI->EndOffset << ") to: " 03506 << *NewAI << "\n"); 03507 03508 // Track the high watermark of the post-promotion worklist. We will reset it 03509 // to this point if the alloca is not in fact scheduled for promotion. 03510 unsigned PPWOldSize = PostPromotionWorklist.size(); 03511 03512 AllocaPartitionRewriter Rewriter(*TD, P, PI, *this, AI, *NewAI, 03513 PI->BeginOffset, PI->EndOffset); 03514 DEBUG(dbgs() << " rewriting "); 03515 DEBUG(P.print(dbgs(), PI, "")); 03516 bool Promotable = Rewriter.visitUsers(P.use_begin(PI), P.use_end(PI)); 03517 if (Promotable) { 03518 DEBUG(dbgs() << " and queuing for promotion\n"); 03519 PromotableAllocas.push_back(NewAI); 03520 } else if (NewAI != &AI) { 03521 // If we can't promote the alloca, iterate on it to check for new 03522 // refinements exposed by splitting the current alloca. Don't iterate on an 03523 // alloca which didn't actually change and didn't get promoted. 03524 Worklist.insert(NewAI); 03525 } 03526 03527 // Drop any post-promotion work items if promotion didn't happen. 03528 if (!Promotable) 03529 while (PostPromotionWorklist.size() > PPWOldSize) 03530 PostPromotionWorklist.pop_back(); 03531 03532 return true; 03533 } 03534 03535 /// \brief Walks the partitioning of an alloca rewriting uses of each partition. 03536 bool SROA::splitAlloca(AllocaInst &AI, AllocaPartitioning &P) { 03537 bool Changed = false; 03538 for (AllocaPartitioning::iterator PI = P.begin(), PE = P.end(); PI != PE; 03539 ++PI) 03540 Changed |= rewriteAllocaPartition(AI, P, PI); 03541 03542 return Changed; 03543 } 03544 03545 /// \brief Analyze an alloca for SROA. 03546 /// 03547 /// This analyzes the alloca to ensure we can reason about it, builds 03548 /// a partitioning of the alloca, and then hands it off to be split and 03549 /// rewritten as needed. 03550 bool SROA::runOnAlloca(AllocaInst &AI) { 03551 DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); 03552 ++NumAllocasAnalyzed; 03553 03554 // Special case dead allocas, as they're trivial. 03555 if (AI.use_empty()) { 03556 AI.eraseFromParent(); 03557 return true; 03558 } 03559 03560 // Skip alloca forms that this analysis can't handle. 03561 if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() || 03562 TD->getTypeAllocSize(AI.getAllocatedType()) == 0) 03563 return false; 03564 03565 bool Changed = false; 03566 03567 // First, split any FCA loads and stores touching this alloca to promote 03568 // better splitting and promotion opportunities. 03569 AggLoadStoreRewriter AggRewriter(*TD); 03570 Changed |= AggRewriter.rewrite(AI); 03571 03572 // Build the partition set using a recursive instruction-visiting builder. 03573 AllocaPartitioning P(*TD, AI); 03574 DEBUG(P.print(dbgs())); 03575 if (P.isEscaped()) 03576 return Changed; 03577 03578 // Delete all the dead users of this alloca before splitting and rewriting it. 03579 for (AllocaPartitioning::dead_user_iterator DI = P.dead_user_begin(), 03580 DE = P.dead_user_end(); 03581 DI != DE; ++DI) { 03582 Changed = true; 03583 (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType())); 03584 DeadInsts.insert(*DI); 03585 } 03586 for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(), 03587 DE = P.dead_op_end(); 03588 DO != DE; ++DO) { 03589 Value *OldV = **DO; 03590 // Clobber the use with an undef value. 03591 **DO = UndefValue::get(OldV->getType()); 03592 if (Instruction *OldI = dyn_cast<Instruction>(OldV)) 03593 if (isInstructionTriviallyDead(OldI)) { 03594 Changed = true; 03595 DeadInsts.insert(OldI); 03596 } 03597 } 03598 03599 // No partitions to split. Leave the dead alloca for a later pass to clean up. 03600 if (P.begin() == P.end()) 03601 return Changed; 03602 03603 return splitAlloca(AI, P) || Changed; 03604 } 03605 03606 /// \brief Delete the dead instructions accumulated in this run. 03607 /// 03608 /// Recursively deletes the dead instructions we've accumulated. This is done 03609 /// at the very end to maximize locality of the recursive delete and to 03610 /// minimize the problems of invalidated instruction pointers as such pointers 03611 /// are used heavily in the intermediate stages of the algorithm. 03612 /// 03613 /// We also record the alloca instructions deleted here so that they aren't 03614 /// subsequently handed to mem2reg to promote. 03615 void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { 03616 while (!DeadInsts.empty()) { 03617 Instruction *I = DeadInsts.pop_back_val(); 03618 DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); 03619 03620 I->replaceAllUsesWith(UndefValue::get(I->getType())); 03621 03622 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 03623 if (Instruction *U = dyn_cast<Instruction>(*OI)) { 03624 // Zero out the operand and see if it becomes trivially dead. 03625 *OI = 0; 03626 if (isInstructionTriviallyDead(U)) 03627 DeadInsts.insert(U); 03628 } 03629 03630 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 03631 DeletedAllocas.insert(AI); 03632 03633 ++NumDeleted; 03634 I->eraseFromParent(); 03635 } 03636 } 03637 03638 /// \brief Promote the allocas, using the best available technique. 03639 /// 03640 /// This attempts to promote whatever allocas have been identified as viable in 03641 /// the PromotableAllocas list. If that list is empty, there is nothing to do. 03642 /// If there is a domtree available, we attempt to promote using the full power 03643 /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is 03644 /// based on the SSAUpdater utilities. This function returns whether any 03645 /// promotion occurred. 03646 bool SROA::promoteAllocas(Function &F) { 03647 if (PromotableAllocas.empty()) 03648 return false; 03649 03650 NumPromoted += PromotableAllocas.size(); 03651 03652 if (DT && !ForceSSAUpdater) { 03653 DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); 03654 PromoteMemToReg(PromotableAllocas, *DT); 03655 PromotableAllocas.clear(); 03656 return true; 03657 } 03658 03659 DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n"); 03660 SSAUpdater SSA; 03661 DIBuilder DIB(*F.getParent()); 03662 SmallVector<Instruction*, 64> Insts; 03663 03664 for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) { 03665 AllocaInst *AI = PromotableAllocas[Idx]; 03666 for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); 03667 UI != UE;) { 03668 Instruction *I = cast<Instruction>(*UI++); 03669 // FIXME: Currently the SSAUpdater infrastructure doesn't reason about 03670 // lifetime intrinsics and so we strip them (and the bitcasts+GEPs 03671 // leading to them) here. Eventually it should use them to optimize the 03672 // scalar values produced. 03673 if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { 03674 assert(onlyUsedByLifetimeMarkers(I) && 03675 "Found a bitcast used outside of a lifetime marker."); 03676 while (!I->use_empty()) 03677 cast<Instruction>(*I->use_begin())->eraseFromParent(); 03678 I->eraseFromParent(); 03679 continue; 03680 } 03681 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 03682 assert(II->getIntrinsicID() == Intrinsic::lifetime_start || 03683 II->getIntrinsicID() == Intrinsic::lifetime_end); 03684 II->eraseFromParent(); 03685 continue; 03686 } 03687 03688 Insts.push_back(I); 03689 } 03690 AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts); 03691 Insts.clear(); 03692 } 03693 03694 PromotableAllocas.clear(); 03695 return true; 03696 } 03697 03698 namespace { 03699 /// \brief A predicate to test whether an alloca belongs to a set. 03700 class IsAllocaInSet { 03701 typedef SmallPtrSet<AllocaInst *, 4> SetType; 03702 const SetType &Set; 03703 03704 public: 03705 typedef AllocaInst *argument_type; 03706 03707 IsAllocaInSet(const SetType &Set) : Set(Set) {} 03708 bool operator()(AllocaInst *AI) const { return Set.count(AI); } 03709 }; 03710 } 03711 03712 bool SROA::runOnFunction(Function &F) { 03713 DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); 03714 C = &F.getContext(); 03715 TD = getAnalysisIfAvailable<DataLayout>(); 03716 if (!TD) { 03717 DEBUG(dbgs() << " Skipping SROA -- no target data!\n"); 03718 return false; 03719 } 03720 DT = getAnalysisIfAvailable<DominatorTree>(); 03721 03722 BasicBlock &EntryBB = F.getEntryBlock(); 03723 for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end()); 03724 I != E; ++I) 03725 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 03726 Worklist.insert(AI); 03727 03728 bool Changed = false; 03729 // A set of deleted alloca instruction pointers which should be removed from 03730 // the list of promotable allocas. 03731 SmallPtrSet<AllocaInst *, 4> DeletedAllocas; 03732 03733 do { 03734 while (!Worklist.empty()) { 03735 Changed |= runOnAlloca(*Worklist.pop_back_val()); 03736 deleteDeadInstructions(DeletedAllocas); 03737 03738 // Remove the deleted allocas from various lists so that we don't try to 03739 // continue processing them. 03740 if (!DeletedAllocas.empty()) { 03741 Worklist.remove_if(IsAllocaInSet(DeletedAllocas)); 03742 PostPromotionWorklist.remove_if(IsAllocaInSet(DeletedAllocas)); 03743 PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(), 03744 PromotableAllocas.end(), 03745 IsAllocaInSet(DeletedAllocas)), 03746 PromotableAllocas.end()); 03747 DeletedAllocas.clear(); 03748 } 03749 } 03750 03751 Changed |= promoteAllocas(F); 03752 03753 Worklist = PostPromotionWorklist; 03754 PostPromotionWorklist.clear(); 03755 } while (!Worklist.empty()); 03756 03757 return Changed; 03758 } 03759 03760 void SROA::getAnalysisUsage(AnalysisUsage &AU) const { 03761 if (RequiresDomTree) 03762 AU.addRequired<DominatorTree>(); 03763 AU.setPreservesCFG(); 03764 }