LLVM  mainline
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
00001 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the visit functions for load, store and alloca.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "InstCombineInternal.h"
00015 #include "llvm/ADT/Statistic.h"
00016 #include "llvm/Analysis/Loads.h"
00017 #include "llvm/IR/DataLayout.h"
00018 #include "llvm/IR/LLVMContext.h"
00019 #include "llvm/IR/IntrinsicInst.h"
00020 #include "llvm/IR/MDBuilder.h"
00021 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00022 #include "llvm/Transforms/Utils/Local.h"
00023 using namespace llvm;
00024 
00025 #define DEBUG_TYPE "instcombine"
00026 
00027 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
00028 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
00029 
00030 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
00031 /// some part of a constant global variable.  This intentionally only accepts
00032 /// constant expressions because we can't rewrite arbitrary instructions.
00033 static bool pointsToConstantGlobal(Value *V) {
00034   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00035     return GV->isConstant();
00036 
00037   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
00038     if (CE->getOpcode() == Instruction::BitCast ||
00039         CE->getOpcode() == Instruction::AddrSpaceCast ||
00040         CE->getOpcode() == Instruction::GetElementPtr)
00041       return pointsToConstantGlobal(CE->getOperand(0));
00042   }
00043   return false;
00044 }
00045 
00046 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
00047 /// pointer to an alloca.  Ignore any reads of the pointer, return false if we
00048 /// see any stores or other unknown uses.  If we see pointer arithmetic, keep
00049 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
00050 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
00051 /// the alloca, and if the source pointer is a pointer to a constant global, we
00052 /// can optimize this.
00053 static bool
00054 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
00055                                SmallVectorImpl<Instruction *> &ToDelete) {
00056   // We track lifetime intrinsics as we encounter them.  If we decide to go
00057   // ahead and replace the value with the global, this lets the caller quickly
00058   // eliminate the markers.
00059 
00060   SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
00061   ValuesToInspect.push_back(std::make_pair(V, false));
00062   while (!ValuesToInspect.empty()) {
00063     auto ValuePair = ValuesToInspect.pop_back_val();
00064     const bool IsOffset = ValuePair.second;
00065     for (auto &U : ValuePair.first->uses()) {
00066       Instruction *I = cast<Instruction>(U.getUser());
00067 
00068       if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
00069         // Ignore non-volatile loads, they are always ok.
00070         if (!LI->isSimple()) return false;
00071         continue;
00072       }
00073 
00074       if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
00075         // If uses of the bitcast are ok, we are ok.
00076         ValuesToInspect.push_back(std::make_pair(I, IsOffset));
00077         continue;
00078       }
00079       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
00080         // If the GEP has all zero indices, it doesn't offset the pointer. If it
00081         // doesn't, it does.
00082         ValuesToInspect.push_back(
00083             std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
00084         continue;
00085       }
00086 
00087       if (auto CS = CallSite(I)) {
00088         // If this is the function being called then we treat it like a load and
00089         // ignore it.
00090         if (CS.isCallee(&U))
00091           continue;
00092 
00093         // Inalloca arguments are clobbered by the call.
00094         unsigned ArgNo = CS.getArgumentNo(&U);
00095         if (CS.isInAllocaArgument(ArgNo))
00096           return false;
00097 
00098         // If this is a readonly/readnone call site, then we know it is just a
00099         // load (but one that potentially returns the value itself), so we can
00100         // ignore it if we know that the value isn't captured.
00101         if (CS.onlyReadsMemory() &&
00102             (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
00103           continue;
00104 
00105         // If this is being passed as a byval argument, the caller is making a
00106         // copy, so it is only a read of the alloca.
00107         if (CS.isByValArgument(ArgNo))
00108           continue;
00109       }
00110 
00111       // Lifetime intrinsics can be handled by the caller.
00112       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00113         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
00114             II->getIntrinsicID() == Intrinsic::lifetime_end) {
00115           assert(II->use_empty() && "Lifetime markers have no result to use!");
00116           ToDelete.push_back(II);
00117           continue;
00118         }
00119       }
00120 
00121       // If this is isn't our memcpy/memmove, reject it as something we can't
00122       // handle.
00123       MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
00124       if (!MI)
00125         return false;
00126 
00127       // If the transfer is using the alloca as a source of the transfer, then
00128       // ignore it since it is a load (unless the transfer is volatile).
00129       if (U.getOperandNo() == 1) {
00130         if (MI->isVolatile()) return false;
00131         continue;
00132       }
00133 
00134       // If we already have seen a copy, reject the second one.
00135       if (TheCopy) return false;
00136 
00137       // If the pointer has been offset from the start of the alloca, we can't
00138       // safely handle this.
00139       if (IsOffset) return false;
00140 
00141       // If the memintrinsic isn't using the alloca as the dest, reject it.
00142       if (U.getOperandNo() != 0) return false;
00143 
00144       // If the source of the memcpy/move is not a constant global, reject it.
00145       if (!pointsToConstantGlobal(MI->getSource()))
00146         return false;
00147 
00148       // Otherwise, the transform is safe.  Remember the copy instruction.
00149       TheCopy = MI;
00150     }
00151   }
00152   return true;
00153 }
00154 
00155 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
00156 /// modified by a copy from a constant global.  If we can prove this, we can
00157 /// replace any uses of the alloca with uses of the global directly.
00158 static MemTransferInst *
00159 isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
00160                                SmallVectorImpl<Instruction *> &ToDelete) {
00161   MemTransferInst *TheCopy = nullptr;
00162   if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
00163     return TheCopy;
00164   return nullptr;
00165 }
00166 
00167 static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
00168   // Check for array size of 1 (scalar allocation).
00169   if (!AI.isArrayAllocation()) {
00170     // i32 1 is the canonical array size for scalar allocations.
00171     if (AI.getArraySize()->getType()->isIntegerTy(32))
00172       return nullptr;
00173 
00174     // Canonicalize it.
00175     Value *V = IC.Builder->getInt32(1);
00176     AI.setOperand(0, V);
00177     return &AI;
00178   }
00179 
00180   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
00181   if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
00182     Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
00183     AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
00184     New->setAlignment(AI.getAlignment());
00185 
00186     // Scan to the end of the allocation instructions, to skip over a block of
00187     // allocas if possible...also skip interleaved debug info
00188     //
00189     BasicBlock::iterator It = New;
00190     while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
00191       ++It;
00192 
00193     // Now that I is pointing to the first non-allocation-inst in the block,
00194     // insert our getelementptr instruction...
00195     //
00196     Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
00197     Value *NullIdx = Constant::getNullValue(IdxTy);
00198     Value *Idx[2] = {NullIdx, NullIdx};
00199     Instruction *GEP =
00200         GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
00201     IC.InsertNewInstBefore(GEP, *It);
00202 
00203     // Now make everything use the getelementptr instead of the original
00204     // allocation.
00205     return IC.ReplaceInstUsesWith(AI, GEP);
00206   }
00207 
00208   if (isa<UndefValue>(AI.getArraySize()))
00209     return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
00210 
00211   // Ensure that the alloca array size argument has type intptr_t, so that
00212   // any casting is exposed early.
00213   Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
00214   if (AI.getArraySize()->getType() != IntPtrTy) {
00215     Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
00216     AI.setOperand(0, V);
00217     return &AI;
00218   }
00219 
00220   return nullptr;
00221 }
00222 
00223 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
00224   if (auto *I = simplifyAllocaArraySize(*this, AI))
00225     return I;
00226 
00227   if (AI.getAllocatedType()->isSized()) {
00228     // If the alignment is 0 (unspecified), assign it the preferred alignment.
00229     if (AI.getAlignment() == 0)
00230       AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
00231 
00232     // Move all alloca's of zero byte objects to the entry block and merge them
00233     // together.  Note that we only do this for alloca's, because malloc should
00234     // allocate and return a unique pointer, even for a zero byte allocation.
00235     if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
00236       // For a zero sized alloca there is no point in doing an array allocation.
00237       // This is helpful if the array size is a complicated expression not used
00238       // elsewhere.
00239       if (AI.isArrayAllocation()) {
00240         AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
00241         return &AI;
00242       }
00243 
00244       // Get the first instruction in the entry block.
00245       BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
00246       Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
00247       if (FirstInst != &AI) {
00248         // If the entry block doesn't start with a zero-size alloca then move
00249         // this one to the start of the entry block.  There is no problem with
00250         // dominance as the array size was forced to a constant earlier already.
00251         AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
00252         if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
00253             DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
00254           AI.moveBefore(FirstInst);
00255           return &AI;
00256         }
00257 
00258         // If the alignment of the entry block alloca is 0 (unspecified),
00259         // assign it the preferred alignment.
00260         if (EntryAI->getAlignment() == 0)
00261           EntryAI->setAlignment(
00262               DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
00263         // Replace this zero-sized alloca with the one at the start of the entry
00264         // block after ensuring that the address will be aligned enough for both
00265         // types.
00266         unsigned MaxAlign = std::max(EntryAI->getAlignment(),
00267                                      AI.getAlignment());
00268         EntryAI->setAlignment(MaxAlign);
00269         if (AI.getType() != EntryAI->getType())
00270           return new BitCastInst(EntryAI, AI.getType());
00271         return ReplaceInstUsesWith(AI, EntryAI);
00272       }
00273     }
00274   }
00275 
00276   if (AI.getAlignment()) {
00277     // Check to see if this allocation is only modified by a memcpy/memmove from
00278     // a constant global whose alignment is equal to or exceeds that of the
00279     // allocation.  If this is the case, we can change all users to use
00280     // the constant global instead.  This is commonly produced by the CFE by
00281     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
00282     // is only subsequently read.
00283     SmallVector<Instruction *, 4> ToDelete;
00284     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
00285       unsigned SourceAlign = getOrEnforceKnownAlignment(
00286           Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
00287       if (AI.getAlignment() <= SourceAlign) {
00288         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
00289         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
00290         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
00291           EraseInstFromFunction(*ToDelete[i]);
00292         Constant *TheSrc = cast<Constant>(Copy->getSource());
00293         Constant *Cast
00294           = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
00295         Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
00296         EraseInstFromFunction(*Copy);
00297         ++NumGlobalCopies;
00298         return NewI;
00299       }
00300     }
00301   }
00302 
00303   // At last, use the generic allocation site handler to aggressively remove
00304   // unused allocas.
00305   return visitAllocSite(AI);
00306 }
00307 
00308 /// \brief Helper to combine a load to a new type.
00309 ///
00310 /// This just does the work of combining a load to a new type. It handles
00311 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
00312 /// loaded *value* type. This will convert it to a pointer, cast the operand to
00313 /// that pointer type, load it, etc.
00314 ///
00315 /// Note that this will create all of the instructions with whatever insert
00316 /// point the \c InstCombiner currently is using.
00317 static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
00318                                       const Twine &Suffix = "") {
00319   Value *Ptr = LI.getPointerOperand();
00320   unsigned AS = LI.getPointerAddressSpace();
00321   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
00322   LI.getAllMetadata(MD);
00323 
00324   LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
00325       IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
00326       LI.getAlignment(), LI.getName() + Suffix);
00327   MDBuilder MDB(NewLoad->getContext());
00328   for (const auto &MDPair : MD) {
00329     unsigned ID = MDPair.first;
00330     MDNode *N = MDPair.second;
00331     // Note, essentially every kind of metadata should be preserved here! This
00332     // routine is supposed to clone a load instruction changing *only its type*.
00333     // The only metadata it makes sense to drop is metadata which is invalidated
00334     // when the pointer type changes. This should essentially never be the case
00335     // in LLVM, but we explicitly switch over only known metadata to be
00336     // conservatively correct. If you are adding metadata to LLVM which pertains
00337     // to loads, you almost certainly want to add it here.
00338     switch (ID) {
00339     case LLVMContext::MD_dbg:
00340     case LLVMContext::MD_tbaa:
00341     case LLVMContext::MD_prof:
00342     case LLVMContext::MD_fpmath:
00343     case LLVMContext::MD_tbaa_struct:
00344     case LLVMContext::MD_invariant_load:
00345     case LLVMContext::MD_alias_scope:
00346     case LLVMContext::MD_noalias:
00347     case LLVMContext::MD_nontemporal:
00348     case LLVMContext::MD_mem_parallel_loop_access:
00349       // All of these directly apply.
00350       NewLoad->setMetadata(ID, N);
00351       break;
00352 
00353     case LLVMContext::MD_nonnull:
00354       // This only directly applies if the new type is also a pointer.
00355       if (NewTy->isPointerTy()) {
00356         NewLoad->setMetadata(ID, N);
00357         break;
00358       }
00359       // If it's integral now, translate it to !range metadata.
00360       if (NewTy->isIntegerTy()) {
00361         auto *ITy = cast<IntegerType>(NewTy);
00362         auto *NullInt = ConstantExpr::getPtrToInt(
00363             ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
00364         auto *NonNullInt =
00365             ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
00366         NewLoad->setMetadata(LLVMContext::MD_range,
00367                              MDB.createRange(NonNullInt, NullInt));
00368       }
00369       break;
00370 
00371     case LLVMContext::MD_range:
00372       // FIXME: It would be nice to propagate this in some way, but the type
00373       // conversions make it hard. If the new type is a pointer, we could
00374       // translate it to !nonnull metadata.
00375       break;
00376     }
00377   }
00378   return NewLoad;
00379 }
00380 
00381 /// \brief Combine a store to a new type.
00382 ///
00383 /// Returns the newly created store instruction.
00384 static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
00385   Value *Ptr = SI.getPointerOperand();
00386   unsigned AS = SI.getPointerAddressSpace();
00387   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
00388   SI.getAllMetadata(MD);
00389 
00390   StoreInst *NewStore = IC.Builder->CreateAlignedStore(
00391       V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
00392       SI.getAlignment());
00393   for (const auto &MDPair : MD) {
00394     unsigned ID = MDPair.first;
00395     MDNode *N = MDPair.second;
00396     // Note, essentially every kind of metadata should be preserved here! This
00397     // routine is supposed to clone a store instruction changing *only its
00398     // type*. The only metadata it makes sense to drop is metadata which is
00399     // invalidated when the pointer type changes. This should essentially
00400     // never be the case in LLVM, but we explicitly switch over only known
00401     // metadata to be conservatively correct. If you are adding metadata to
00402     // LLVM which pertains to stores, you almost certainly want to add it
00403     // here.
00404     switch (ID) {
00405     case LLVMContext::MD_dbg:
00406     case LLVMContext::MD_tbaa:
00407     case LLVMContext::MD_prof:
00408     case LLVMContext::MD_fpmath:
00409     case LLVMContext::MD_tbaa_struct:
00410     case LLVMContext::MD_alias_scope:
00411     case LLVMContext::MD_noalias:
00412     case LLVMContext::MD_nontemporal:
00413     case LLVMContext::MD_mem_parallel_loop_access:
00414       // All of these directly apply.
00415       NewStore->setMetadata(ID, N);
00416       break;
00417 
00418     case LLVMContext::MD_invariant_load:
00419     case LLVMContext::MD_nonnull:
00420     case LLVMContext::MD_range:
00421       // These don't apply for stores.
00422       break;
00423     }
00424   }
00425 
00426   return NewStore;
00427 }
00428 
00429 /// \brief Combine loads to match the type of value their uses after looking
00430 /// through intervening bitcasts.
00431 ///
00432 /// The core idea here is that if the result of a load is used in an operation,
00433 /// we should load the type most conducive to that operation. For example, when
00434 /// loading an integer and converting that immediately to a pointer, we should
00435 /// instead directly load a pointer.
00436 ///
00437 /// However, this routine must never change the width of a load or the number of
00438 /// loads as that would introduce a semantic change. This combine is expected to
00439 /// be a semantic no-op which just allows loads to more closely model the types
00440 /// of their consuming operations.
00441 ///
00442 /// Currently, we also refuse to change the precise type used for an atomic load
00443 /// or a volatile load. This is debatable, and might be reasonable to change
00444 /// later. However, it is risky in case some backend or other part of LLVM is
00445 /// relying on the exact type loaded to select appropriate atomic operations.
00446 static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
00447   // FIXME: We could probably with some care handle both volatile and atomic
00448   // loads here but it isn't clear that this is important.
00449   if (!LI.isSimple())
00450     return nullptr;
00451 
00452   if (LI.use_empty())
00453     return nullptr;
00454 
00455   Type *Ty = LI.getType();
00456   const DataLayout &DL = IC.getDataLayout();
00457 
00458   // Try to canonicalize loads which are only ever stored to operate over
00459   // integers instead of any other type. We only do this when the loaded type
00460   // is sized and has a size exactly the same as its store size and the store
00461   // size is a legal integer type.
00462   if (!Ty->isIntegerTy() && Ty->isSized() &&
00463       DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
00464       DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
00465     if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
00466           auto *SI = dyn_cast<StoreInst>(U);
00467           return SI && SI->getPointerOperand() != &LI;
00468         })) {
00469       LoadInst *NewLoad = combineLoadToNewType(
00470           IC, LI,
00471           Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
00472       // Replace all the stores with stores of the newly loaded value.
00473       for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
00474         auto *SI = cast<StoreInst>(*UI++);
00475         IC.Builder->SetInsertPoint(SI);
00476         combineStoreToNewValue(IC, *SI, NewLoad);
00477         IC.EraseInstFromFunction(*SI);
00478       }
00479       assert(LI.use_empty() && "Failed to remove all users of the load!");
00480       // Return the old load so the combiner can delete it safely.
00481       return &LI;
00482     }
00483   }
00484 
00485   // Fold away bit casts of the loaded value by loading the desired type.
00486   if (LI.hasOneUse())
00487     if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
00488       LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy());
00489       BC->replaceAllUsesWith(NewLoad);
00490       IC.EraseInstFromFunction(*BC);
00491       return &LI;
00492     }
00493 
00494   // FIXME: We should also canonicalize loads of vectors when their elements are
00495   // cast to other types.
00496   return nullptr;
00497 }
00498 
00499 static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
00500   // FIXME: We could probably with some care handle both volatile and atomic
00501   // stores here but it isn't clear that this is important.
00502   if (!LI.isSimple())
00503     return nullptr;
00504 
00505   Type *T = LI.getType();
00506   if (!T->isAggregateType())
00507     return nullptr;
00508 
00509   assert(LI.getAlignment() && "Alignement must be set at this point");
00510 
00511   if (auto *ST = dyn_cast<StructType>(T)) {
00512     // If the struct only have one element, we unpack.
00513     if (ST->getNumElements() == 1) {
00514       LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
00515                                                ".unpack");
00516       return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
00517         UndefValue::get(T), NewLoad, 0, LI.getName()));
00518     }
00519   }
00520 
00521   if (auto *AT = dyn_cast<ArrayType>(T)) {
00522     // If the array only have one element, we unpack.
00523     if (AT->getNumElements() == 1) {
00524       LoadInst *NewLoad = combineLoadToNewType(IC, LI, AT->getElementType(),
00525                                                ".unpack");
00526       return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
00527         UndefValue::get(T), NewLoad, 0, LI.getName()));
00528     }
00529   }
00530 
00531   return nullptr;
00532 }
00533 
00534 // If we can determine that all possible objects pointed to by the provided
00535 // pointer value are, not only dereferenceable, but also definitively less than
00536 // or equal to the provided maximum size, then return true. Otherwise, return
00537 // false (constant global values and allocas fall into this category).
00538 //
00539 // FIXME: This should probably live in ValueTracking (or similar).
00540 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
00541                                      const DataLayout &DL) {
00542   SmallPtrSet<Value *, 4> Visited;
00543   SmallVector<Value *, 4> Worklist(1, V);
00544 
00545   do {
00546     Value *P = Worklist.pop_back_val();
00547     P = P->stripPointerCasts();
00548 
00549     if (!Visited.insert(P).second)
00550       continue;
00551 
00552     if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
00553       Worklist.push_back(SI->getTrueValue());
00554       Worklist.push_back(SI->getFalseValue());
00555       continue;
00556     }
00557 
00558     if (PHINode *PN = dyn_cast<PHINode>(P)) {
00559       for (Value *IncValue : PN->incoming_values())
00560         Worklist.push_back(IncValue);
00561       continue;
00562     }
00563 
00564     if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
00565       if (GA->mayBeOverridden())
00566         return false;
00567       Worklist.push_back(GA->getAliasee());
00568       continue;
00569     }
00570 
00571     // If we know how big this object is, and it is less than MaxSize, continue
00572     // searching. Otherwise, return false.
00573     if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
00574       if (!AI->getAllocatedType()->isSized())
00575         return false;
00576 
00577       ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
00578       if (!CS)
00579         return false;
00580 
00581       uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
00582       // Make sure that, even if the multiplication below would wrap as an
00583       // uint64_t, we still do the right thing.
00584       if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
00585         return false;
00586       continue;
00587     }
00588 
00589     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
00590       if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
00591         return false;
00592 
00593       uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
00594       if (InitSize > MaxSize)
00595         return false;
00596       continue;
00597     }
00598 
00599     return false;
00600   } while (!Worklist.empty());
00601 
00602   return true;
00603 }
00604 
00605 // If we're indexing into an object of a known size, and the outer index is
00606 // not a constant, but having any value but zero would lead to undefined
00607 // behavior, replace it with zero.
00608 //
00609 // For example, if we have:
00610 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
00611 // ...
00612 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
00613 // ... = load i32* %arrayidx, align 4
00614 // Then we know that we can replace %x in the GEP with i64 0.
00615 //
00616 // FIXME: We could fold any GEP index to zero that would cause UB if it were
00617 // not zero. Currently, we only handle the first such index. Also, we could
00618 // also search through non-zero constant indices if we kept track of the
00619 // offsets those indices implied.
00620 static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
00621                                      Instruction *MemI, unsigned &Idx) {
00622   if (GEPI->getNumOperands() < 2)
00623     return false;
00624 
00625   // Find the first non-zero index of a GEP. If all indices are zero, return
00626   // one past the last index.
00627   auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
00628     unsigned I = 1;
00629     for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
00630       Value *V = GEPI->getOperand(I);
00631       if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
00632         if (CI->isZero())
00633           continue;
00634 
00635       break;
00636     }
00637 
00638     return I;
00639   };
00640 
00641   // Skip through initial 'zero' indices, and find the corresponding pointer
00642   // type. See if the next index is not a constant.
00643   Idx = FirstNZIdx(GEPI);
00644   if (Idx == GEPI->getNumOperands())
00645     return false;
00646   if (isa<Constant>(GEPI->getOperand(Idx)))
00647     return false;
00648 
00649   SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
00650   Type *AllocTy = GetElementPtrInst::getIndexedType(
00651       cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
00652           ->getElementType(),
00653       Ops);
00654   if (!AllocTy || !AllocTy->isSized())
00655     return false;
00656   const DataLayout &DL = IC.getDataLayout();
00657   uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
00658 
00659   // If there are more indices after the one we might replace with a zero, make
00660   // sure they're all non-negative. If any of them are negative, the overall
00661   // address being computed might be before the base address determined by the
00662   // first non-zero index.
00663   auto IsAllNonNegative = [&]() {
00664     for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
00665       bool KnownNonNegative, KnownNegative;
00666       IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
00667                         KnownNegative, 0, MemI);
00668       if (KnownNonNegative)
00669         continue;
00670       return false;
00671     }
00672 
00673     return true;
00674   };
00675 
00676   // FIXME: If the GEP is not inbounds, and there are extra indices after the
00677   // one we'll replace, those could cause the address computation to wrap
00678   // (rendering the IsAllNonNegative() check below insufficient). We can do
00679   // better, ignoring zero indicies (and other indicies we can prove small
00680   // enough not to wrap).
00681   if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
00682     return false;
00683 
00684   // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
00685   // also known to be dereferenceable.
00686   return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
00687          IsAllNonNegative();
00688 }
00689 
00690 // If we're indexing into an object with a variable index for the memory
00691 // access, but the object has only one element, we can assume that the index
00692 // will always be zero. If we replace the GEP, return it.
00693 template <typename T>
00694 static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
00695                                           T &MemI) {
00696   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
00697     unsigned Idx;
00698     if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
00699       Instruction *NewGEPI = GEPI->clone();
00700       NewGEPI->setOperand(Idx,
00701         ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
00702       NewGEPI->insertBefore(GEPI);
00703       MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
00704       return NewGEPI;
00705     }
00706   }
00707 
00708   return nullptr;
00709 }
00710 
00711 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
00712   Value *Op = LI.getOperand(0);
00713 
00714   // Try to canonicalize the loaded type.
00715   if (Instruction *Res = combineLoadToOperationType(*this, LI))
00716     return Res;
00717 
00718   // Attempt to improve the alignment.
00719   unsigned KnownAlign = getOrEnforceKnownAlignment(
00720       Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
00721   unsigned LoadAlign = LI.getAlignment();
00722   unsigned EffectiveLoadAlign =
00723       LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
00724 
00725   if (KnownAlign > EffectiveLoadAlign)
00726     LI.setAlignment(KnownAlign);
00727   else if (LoadAlign == 0)
00728     LI.setAlignment(EffectiveLoadAlign);
00729 
00730   // Replace GEP indices if possible.
00731   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
00732       Worklist.Add(NewGEPI);
00733       return &LI;
00734   }
00735 
00736   // None of the following transforms are legal for volatile/atomic loads.
00737   // FIXME: Some of it is okay for atomic loads; needs refactoring.
00738   if (!LI.isSimple()) return nullptr;
00739 
00740   if (Instruction *Res = unpackLoadToAggregate(*this, LI))
00741     return Res;
00742 
00743   // Do really simple store-to-load forwarding and load CSE, to catch cases
00744   // where there are several consecutive memory accesses to the same location,
00745   // separated by a few arithmetic operations.
00746   BasicBlock::iterator BBI = &LI;
00747   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
00748     return ReplaceInstUsesWith(
00749         LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
00750                                             LI.getName() + ".cast"));
00751 
00752   // load(gep null, ...) -> unreachable
00753   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
00754     const Value *GEPI0 = GEPI->getOperand(0);
00755     // TODO: Consider a target hook for valid address spaces for this xform.
00756     if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
00757       // Insert a new store to null instruction before the load to indicate
00758       // that this code is not reachable.  We do this instead of inserting
00759       // an unreachable instruction directly because we cannot modify the
00760       // CFG.
00761       new StoreInst(UndefValue::get(LI.getType()),
00762                     Constant::getNullValue(Op->getType()), &LI);
00763       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00764     }
00765   }
00766 
00767   // load null/undef -> unreachable
00768   // TODO: Consider a target hook for valid address spaces for this xform.
00769   if (isa<UndefValue>(Op) ||
00770       (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
00771     // Insert a new store to null instruction before the load to indicate that
00772     // this code is not reachable.  We do this instead of inserting an
00773     // unreachable instruction directly because we cannot modify the CFG.
00774     new StoreInst(UndefValue::get(LI.getType()),
00775                   Constant::getNullValue(Op->getType()), &LI);
00776     return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00777   }
00778 
00779   if (Op->hasOneUse()) {
00780     // Change select and PHI nodes to select values instead of addresses: this
00781     // helps alias analysis out a lot, allows many others simplifications, and
00782     // exposes redundancy in the code.
00783     //
00784     // Note that we cannot do the transformation unless we know that the
00785     // introduced loads cannot trap!  Something like this is valid as long as
00786     // the condition is always false: load (select bool %C, int* null, int* %G),
00787     // but it would not be valid if we transformed it to load from null
00788     // unconditionally.
00789     //
00790     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
00791       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
00792       unsigned Align = LI.getAlignment();
00793       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
00794           isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
00795         LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
00796                                            SI->getOperand(1)->getName()+".val");
00797         LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
00798                                            SI->getOperand(2)->getName()+".val");
00799         V1->setAlignment(Align);
00800         V2->setAlignment(Align);
00801         return SelectInst::Create(SI->getCondition(), V1, V2);
00802       }
00803 
00804       // load (select (cond, null, P)) -> load P
00805       if (isa<ConstantPointerNull>(SI->getOperand(1)) && 
00806           LI.getPointerAddressSpace() == 0) {
00807         LI.setOperand(0, SI->getOperand(2));
00808         return &LI;
00809       }
00810 
00811       // load (select (cond, P, null)) -> load P
00812       if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
00813           LI.getPointerAddressSpace() == 0) {
00814         LI.setOperand(0, SI->getOperand(1));
00815         return &LI;
00816       }
00817     }
00818   }
00819   return nullptr;
00820 }
00821 
00822 /// \brief Combine stores to match the type of value being stored.
00823 ///
00824 /// The core idea here is that the memory does not have any intrinsic type and
00825 /// where we can we should match the type of a store to the type of value being
00826 /// stored.
00827 ///
00828 /// However, this routine must never change the width of a store or the number of
00829 /// stores as that would introduce a semantic change. This combine is expected to
00830 /// be a semantic no-op which just allows stores to more closely model the types
00831 /// of their incoming values.
00832 ///
00833 /// Currently, we also refuse to change the precise type used for an atomic or
00834 /// volatile store. This is debatable, and might be reasonable to change later.
00835 /// However, it is risky in case some backend or other part of LLVM is relying
00836 /// on the exact type stored to select appropriate atomic operations.
00837 ///
00838 /// \returns true if the store was successfully combined away. This indicates
00839 /// the caller must erase the store instruction. We have to let the caller erase
00840 /// the store instruction sas otherwise there is no way to signal whether it was
00841 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
00842 static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
00843   // FIXME: We could probably with some care handle both volatile and atomic
00844   // stores here but it isn't clear that this is important.
00845   if (!SI.isSimple())
00846     return false;
00847 
00848   Value *V = SI.getValueOperand();
00849 
00850   // Fold away bit casts of the stored value by storing the original type.
00851   if (auto *BC = dyn_cast<BitCastInst>(V)) {
00852     V = BC->getOperand(0);
00853     combineStoreToNewValue(IC, SI, V);
00854     return true;
00855   }
00856 
00857   // FIXME: We should also canonicalize loads of vectors when their elements are
00858   // cast to other types.
00859   return false;
00860 }
00861 
00862 static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
00863   // FIXME: We could probably with some care handle both volatile and atomic
00864   // stores here but it isn't clear that this is important.
00865   if (!SI.isSimple())
00866     return false;
00867 
00868   Value *V = SI.getValueOperand();
00869   Type *T = V->getType();
00870 
00871   if (!T->isAggregateType())
00872     return false;
00873 
00874   if (auto *ST = dyn_cast<StructType>(T)) {
00875     // If the struct only have one element, we unpack.
00876     if (ST->getNumElements() == 1) {
00877       V = IC.Builder->CreateExtractValue(V, 0);
00878       combineStoreToNewValue(IC, SI, V);
00879       return true;
00880     }
00881   }
00882 
00883   if (auto *AT = dyn_cast<ArrayType>(T)) {
00884     // If the array only have one element, we unpack.
00885     if (AT->getNumElements() == 1) {
00886       V = IC.Builder->CreateExtractValue(V, 0);
00887       combineStoreToNewValue(IC, SI, V);
00888       return true;
00889     }
00890   }
00891 
00892   return false;
00893 }
00894 
00895 /// equivalentAddressValues - Test if A and B will obviously have the same
00896 /// value. This includes recognizing that %t0 and %t1 will have the same
00897 /// value in code like this:
00898 ///   %t0 = getelementptr \@a, 0, 3
00899 ///   store i32 0, i32* %t0
00900 ///   %t1 = getelementptr \@a, 0, 3
00901 ///   %t2 = load i32* %t1
00902 ///
00903 static bool equivalentAddressValues(Value *A, Value *B) {
00904   // Test if the values are trivially equivalent.
00905   if (A == B) return true;
00906 
00907   // Test if the values come form identical arithmetic instructions.
00908   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
00909   // its only used to compare two uses within the same basic block, which
00910   // means that they'll always either have the same value or one of them
00911   // will have an undefined value.
00912   if (isa<BinaryOperator>(A) ||
00913       isa<CastInst>(A) ||
00914       isa<PHINode>(A) ||
00915       isa<GetElementPtrInst>(A))
00916     if (Instruction *BI = dyn_cast<Instruction>(B))
00917       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
00918         return true;
00919 
00920   // Otherwise they may not be equivalent.
00921   return false;
00922 }
00923 
00924 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
00925   Value *Val = SI.getOperand(0);
00926   Value *Ptr = SI.getOperand(1);
00927 
00928   // Try to canonicalize the stored type.
00929   if (combineStoreToValueType(*this, SI))
00930     return EraseInstFromFunction(SI);
00931 
00932   // Attempt to improve the alignment.
00933   unsigned KnownAlign = getOrEnforceKnownAlignment(
00934       Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
00935   unsigned StoreAlign = SI.getAlignment();
00936   unsigned EffectiveStoreAlign =
00937       StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
00938 
00939   if (KnownAlign > EffectiveStoreAlign)
00940     SI.setAlignment(KnownAlign);
00941   else if (StoreAlign == 0)
00942     SI.setAlignment(EffectiveStoreAlign);
00943 
00944   // Try to canonicalize the stored type.
00945   if (unpackStoreToAggregate(*this, SI))
00946     return EraseInstFromFunction(SI);
00947 
00948   // Replace GEP indices if possible.
00949   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
00950       Worklist.Add(NewGEPI);
00951       return &SI;
00952   }
00953 
00954   // Don't hack volatile/atomic stores.
00955   // FIXME: Some bits are legal for atomic stores; needs refactoring.
00956   if (!SI.isSimple()) return nullptr;
00957 
00958   // If the RHS is an alloca with a single use, zapify the store, making the
00959   // alloca dead.
00960   if (Ptr->hasOneUse()) {
00961     if (isa<AllocaInst>(Ptr))
00962       return EraseInstFromFunction(SI);
00963     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
00964       if (isa<AllocaInst>(GEP->getOperand(0))) {
00965         if (GEP->getOperand(0)->hasOneUse())
00966           return EraseInstFromFunction(SI);
00967       }
00968     }
00969   }
00970 
00971   // Do really simple DSE, to catch cases where there are several consecutive
00972   // stores to the same location, separated by a few arithmetic operations. This
00973   // situation often occurs with bitfield accesses.
00974   BasicBlock::iterator BBI = &SI;
00975   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
00976        --ScanInsts) {
00977     --BBI;
00978     // Don't count debug info directives, lest they affect codegen,
00979     // and we skip pointer-to-pointer bitcasts, which are NOPs.
00980     if (isa<DbgInfoIntrinsic>(BBI) ||
00981         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
00982       ScanInsts++;
00983       continue;
00984     }
00985 
00986     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
00987       // Prev store isn't volatile, and stores to the same location?
00988       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
00989                                                         SI.getOperand(1))) {
00990         ++NumDeadStore;
00991         ++BBI;
00992         EraseInstFromFunction(*PrevSI);
00993         continue;
00994       }
00995       break;
00996     }
00997 
00998     // If this is a load, we have to stop.  However, if the loaded value is from
00999     // the pointer we're loading and is producing the pointer we're storing,
01000     // then *this* store is dead (X = load P; store X -> P).
01001     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
01002       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
01003           LI->isSimple())
01004         return EraseInstFromFunction(SI);
01005 
01006       // Otherwise, this is a load from some other location.  Stores before it
01007       // may not be dead.
01008       break;
01009     }
01010 
01011     // Don't skip over loads or things that can modify memory.
01012     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
01013       break;
01014   }
01015 
01016   // store X, null    -> turns into 'unreachable' in SimplifyCFG
01017   if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
01018     if (!isa<UndefValue>(Val)) {
01019       SI.setOperand(0, UndefValue::get(Val->getType()));
01020       if (Instruction *U = dyn_cast<Instruction>(Val))
01021         Worklist.Add(U);  // Dropped a use.
01022     }
01023     return nullptr;  // Do not modify these!
01024   }
01025 
01026   // store undef, Ptr -> noop
01027   if (isa<UndefValue>(Val))
01028     return EraseInstFromFunction(SI);
01029 
01030   // If this store is the last instruction in the basic block (possibly
01031   // excepting debug info instructions), and if the block ends with an
01032   // unconditional branch, try to move it to the successor block.
01033   BBI = &SI;
01034   do {
01035     ++BBI;
01036   } while (isa<DbgInfoIntrinsic>(BBI) ||
01037            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
01038   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
01039     if (BI->isUnconditional())
01040       if (SimplifyStoreAtEndOfBlock(SI))
01041         return nullptr;  // xform done!
01042 
01043   return nullptr;
01044 }
01045 
01046 /// SimplifyStoreAtEndOfBlock - Turn things like:
01047 ///   if () { *P = v1; } else { *P = v2 }
01048 /// into a phi node with a store in the successor.
01049 ///
01050 /// Simplify things like:
01051 ///   *P = v1; if () { *P = v2; }
01052 /// into a phi node with a store in the successor.
01053 ///
01054 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
01055   BasicBlock *StoreBB = SI.getParent();
01056 
01057   // Check to see if the successor block has exactly two incoming edges.  If
01058   // so, see if the other predecessor contains a store to the same location.
01059   // if so, insert a PHI node (if needed) and move the stores down.
01060   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
01061 
01062   // Determine whether Dest has exactly two predecessors and, if so, compute
01063   // the other predecessor.
01064   pred_iterator PI = pred_begin(DestBB);
01065   BasicBlock *P = *PI;
01066   BasicBlock *OtherBB = nullptr;
01067 
01068   if (P != StoreBB)
01069     OtherBB = P;
01070 
01071   if (++PI == pred_end(DestBB))
01072     return false;
01073 
01074   P = *PI;
01075   if (P != StoreBB) {
01076     if (OtherBB)
01077       return false;
01078     OtherBB = P;
01079   }
01080   if (++PI != pred_end(DestBB))
01081     return false;
01082 
01083   // Bail out if all the relevant blocks aren't distinct (this can happen,
01084   // for example, if SI is in an infinite loop)
01085   if (StoreBB == DestBB || OtherBB == DestBB)
01086     return false;
01087 
01088   // Verify that the other block ends in a branch and is not otherwise empty.
01089   BasicBlock::iterator BBI = OtherBB->getTerminator();
01090   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
01091   if (!OtherBr || BBI == OtherBB->begin())
01092     return false;
01093 
01094   // If the other block ends in an unconditional branch, check for the 'if then
01095   // else' case.  there is an instruction before the branch.
01096   StoreInst *OtherStore = nullptr;
01097   if (OtherBr->isUnconditional()) {
01098     --BBI;
01099     // Skip over debugging info.
01100     while (isa<DbgInfoIntrinsic>(BBI) ||
01101            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
01102       if (BBI==OtherBB->begin())
01103         return false;
01104       --BBI;
01105     }
01106     // If this isn't a store, isn't a store to the same location, or is not the
01107     // right kind of store, bail out.
01108     OtherStore = dyn_cast<StoreInst>(BBI);
01109     if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
01110         !SI.isSameOperationAs(OtherStore))
01111       return false;
01112   } else {
01113     // Otherwise, the other block ended with a conditional branch. If one of the
01114     // destinations is StoreBB, then we have the if/then case.
01115     if (OtherBr->getSuccessor(0) != StoreBB &&
01116         OtherBr->getSuccessor(1) != StoreBB)
01117       return false;
01118 
01119     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
01120     // if/then triangle.  See if there is a store to the same ptr as SI that
01121     // lives in OtherBB.
01122     for (;; --BBI) {
01123       // Check to see if we find the matching store.
01124       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
01125         if (OtherStore->getOperand(1) != SI.getOperand(1) ||
01126             !SI.isSameOperationAs(OtherStore))
01127           return false;
01128         break;
01129       }
01130       // If we find something that may be using or overwriting the stored
01131       // value, or if we run out of instructions, we can't do the xform.
01132       if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
01133           BBI == OtherBB->begin())
01134         return false;
01135     }
01136 
01137     // In order to eliminate the store in OtherBr, we have to
01138     // make sure nothing reads or overwrites the stored value in
01139     // StoreBB.
01140     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
01141       // FIXME: This should really be AA driven.
01142       if (I->mayReadFromMemory() || I->mayWriteToMemory())
01143         return false;
01144     }
01145   }
01146 
01147   // Insert a PHI node now if we need it.
01148   Value *MergedVal = OtherStore->getOperand(0);
01149   if (MergedVal != SI.getOperand(0)) {
01150     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
01151     PN->addIncoming(SI.getOperand(0), SI.getParent());
01152     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
01153     MergedVal = InsertNewInstBefore(PN, DestBB->front());
01154   }
01155 
01156   // Advance to a place where it is safe to insert the new store and
01157   // insert it.
01158   BBI = DestBB->getFirstInsertionPt();
01159   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
01160                                    SI.isVolatile(),
01161                                    SI.getAlignment(),
01162                                    SI.getOrdering(),
01163                                    SI.getSynchScope());
01164   InsertNewInstBefore(NewSI, *BBI);
01165   NewSI->setDebugLoc(OtherStore->getDebugLoc());
01166 
01167   // If the two stores had AA tags, merge them.
01168   AAMDNodes AATags;
01169   SI.getAAMetadata(AATags);
01170   if (AATags) {
01171     OtherStore->getAAMetadata(AATags, /* Merge = */ true);
01172     NewSI->setAAMetadata(AATags);
01173   }
01174 
01175   // Nuke the old stores.
01176   EraseInstFromFunction(SI);
01177   EraseInstFromFunction(*OtherStore);
01178   return true;
01179 }