LLVM  mainline
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
00001 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the visit functions for load, store and alloca.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "InstCombineInternal.h"
00015 #include "llvm/ADT/Statistic.h"
00016 #include "llvm/Analysis/Loads.h"
00017 #include "llvm/IR/DataLayout.h"
00018 #include "llvm/IR/LLVMContext.h"
00019 #include "llvm/IR/IntrinsicInst.h"
00020 #include "llvm/IR/MDBuilder.h"
00021 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00022 #include "llvm/Transforms/Utils/Local.h"
00023 using namespace llvm;
00024 
00025 #define DEBUG_TYPE "instcombine"
00026 
00027 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
00028 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
00029 
00030 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
00031 /// some part of a constant global variable.  This intentionally only accepts
00032 /// constant expressions because we can't rewrite arbitrary instructions.
00033 static bool pointsToConstantGlobal(Value *V) {
00034   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00035     return GV->isConstant();
00036 
00037   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
00038     if (CE->getOpcode() == Instruction::BitCast ||
00039         CE->getOpcode() == Instruction::AddrSpaceCast ||
00040         CE->getOpcode() == Instruction::GetElementPtr)
00041       return pointsToConstantGlobal(CE->getOperand(0));
00042   }
00043   return false;
00044 }
00045 
00046 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
00047 /// pointer to an alloca.  Ignore any reads of the pointer, return false if we
00048 /// see any stores or other unknown uses.  If we see pointer arithmetic, keep
00049 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
00050 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
00051 /// the alloca, and if the source pointer is a pointer to a constant global, we
00052 /// can optimize this.
00053 static bool
00054 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
00055                                SmallVectorImpl<Instruction *> &ToDelete) {
00056   // We track lifetime intrinsics as we encounter them.  If we decide to go
00057   // ahead and replace the value with the global, this lets the caller quickly
00058   // eliminate the markers.
00059 
00060   SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
00061   ValuesToInspect.push_back(std::make_pair(V, false));
00062   while (!ValuesToInspect.empty()) {
00063     auto ValuePair = ValuesToInspect.pop_back_val();
00064     const bool IsOffset = ValuePair.second;
00065     for (auto &U : ValuePair.first->uses()) {
00066       Instruction *I = cast<Instruction>(U.getUser());
00067 
00068       if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
00069         // Ignore non-volatile loads, they are always ok.
00070         if (!LI->isSimple()) return false;
00071         continue;
00072       }
00073 
00074       if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
00075         // If uses of the bitcast are ok, we are ok.
00076         ValuesToInspect.push_back(std::make_pair(I, IsOffset));
00077         continue;
00078       }
00079       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
00080         // If the GEP has all zero indices, it doesn't offset the pointer. If it
00081         // doesn't, it does.
00082         ValuesToInspect.push_back(
00083             std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
00084         continue;
00085       }
00086 
00087       if (CallSite CS = I) {
00088         // If this is the function being called then we treat it like a load and
00089         // ignore it.
00090         if (CS.isCallee(&U))
00091           continue;
00092 
00093         // Inalloca arguments are clobbered by the call.
00094         unsigned ArgNo = CS.getArgumentNo(&U);
00095         if (CS.isInAllocaArgument(ArgNo))
00096           return false;
00097 
00098         // If this is a readonly/readnone call site, then we know it is just a
00099         // load (but one that potentially returns the value itself), so we can
00100         // ignore it if we know that the value isn't captured.
00101         if (CS.onlyReadsMemory() &&
00102             (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
00103           continue;
00104 
00105         // If this is being passed as a byval argument, the caller is making a
00106         // copy, so it is only a read of the alloca.
00107         if (CS.isByValArgument(ArgNo))
00108           continue;
00109       }
00110 
00111       // Lifetime intrinsics can be handled by the caller.
00112       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00113         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
00114             II->getIntrinsicID() == Intrinsic::lifetime_end) {
00115           assert(II->use_empty() && "Lifetime markers have no result to use!");
00116           ToDelete.push_back(II);
00117           continue;
00118         }
00119       }
00120 
00121       // If this is isn't our memcpy/memmove, reject it as something we can't
00122       // handle.
00123       MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
00124       if (!MI)
00125         return false;
00126 
00127       // If the transfer is using the alloca as a source of the transfer, then
00128       // ignore it since it is a load (unless the transfer is volatile).
00129       if (U.getOperandNo() == 1) {
00130         if (MI->isVolatile()) return false;
00131         continue;
00132       }
00133 
00134       // If we already have seen a copy, reject the second one.
00135       if (TheCopy) return false;
00136 
00137       // If the pointer has been offset from the start of the alloca, we can't
00138       // safely handle this.
00139       if (IsOffset) return false;
00140 
00141       // If the memintrinsic isn't using the alloca as the dest, reject it.
00142       if (U.getOperandNo() != 0) return false;
00143 
00144       // If the source of the memcpy/move is not a constant global, reject it.
00145       if (!pointsToConstantGlobal(MI->getSource()))
00146         return false;
00147 
00148       // Otherwise, the transform is safe.  Remember the copy instruction.
00149       TheCopy = MI;
00150     }
00151   }
00152   return true;
00153 }
00154 
00155 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
00156 /// modified by a copy from a constant global.  If we can prove this, we can
00157 /// replace any uses of the alloca with uses of the global directly.
00158 static MemTransferInst *
00159 isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
00160                                SmallVectorImpl<Instruction *> &ToDelete) {
00161   MemTransferInst *TheCopy = nullptr;
00162   if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
00163     return TheCopy;
00164   return nullptr;
00165 }
00166 
00167 static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
00168   // Check for array size of 1 (scalar allocation).
00169   if (!AI.isArrayAllocation()) {
00170     // i32 1 is the canonical array size for scalar allocations.
00171     if (AI.getArraySize()->getType()->isIntegerTy(32))
00172       return nullptr;
00173 
00174     // Canonicalize it.
00175     Value *V = IC.Builder->getInt32(1);
00176     AI.setOperand(0, V);
00177     return &AI;
00178   }
00179 
00180   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
00181   if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
00182     Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
00183     AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
00184     New->setAlignment(AI.getAlignment());
00185 
00186     // Scan to the end of the allocation instructions, to skip over a block of
00187     // allocas if possible...also skip interleaved debug info
00188     //
00189     BasicBlock::iterator It = New;
00190     while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
00191       ++It;
00192 
00193     // Now that I is pointing to the first non-allocation-inst in the block,
00194     // insert our getelementptr instruction...
00195     //
00196     Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
00197     Value *NullIdx = Constant::getNullValue(IdxTy);
00198     Value *Idx[2] = {NullIdx, NullIdx};
00199     Instruction *GEP =
00200         GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
00201     IC.InsertNewInstBefore(GEP, *It);
00202 
00203     // Now make everything use the getelementptr instead of the original
00204     // allocation.
00205     return IC.ReplaceInstUsesWith(AI, GEP);
00206   }
00207 
00208   if (isa<UndefValue>(AI.getArraySize()))
00209     return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
00210 
00211   // Ensure that the alloca array size argument has type intptr_t, so that
00212   // any casting is exposed early.
00213   Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
00214   if (AI.getArraySize()->getType() != IntPtrTy) {
00215     Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
00216     AI.setOperand(0, V);
00217     return &AI;
00218   }
00219 
00220   return nullptr;
00221 }
00222 
00223 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
00224   if (auto *I = simplifyAllocaArraySize(*this, AI))
00225     return I;
00226 
00227   if (AI.getAllocatedType()->isSized()) {
00228     // If the alignment is 0 (unspecified), assign it the preferred alignment.
00229     if (AI.getAlignment() == 0)
00230       AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
00231 
00232     // Move all alloca's of zero byte objects to the entry block and merge them
00233     // together.  Note that we only do this for alloca's, because malloc should
00234     // allocate and return a unique pointer, even for a zero byte allocation.
00235     if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
00236       // For a zero sized alloca there is no point in doing an array allocation.
00237       // This is helpful if the array size is a complicated expression not used
00238       // elsewhere.
00239       if (AI.isArrayAllocation()) {
00240         AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
00241         return &AI;
00242       }
00243 
00244       // Get the first instruction in the entry block.
00245       BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
00246       Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
00247       if (FirstInst != &AI) {
00248         // If the entry block doesn't start with a zero-size alloca then move
00249         // this one to the start of the entry block.  There is no problem with
00250         // dominance as the array size was forced to a constant earlier already.
00251         AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
00252         if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
00253             DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
00254           AI.moveBefore(FirstInst);
00255           return &AI;
00256         }
00257 
00258         // If the alignment of the entry block alloca is 0 (unspecified),
00259         // assign it the preferred alignment.
00260         if (EntryAI->getAlignment() == 0)
00261           EntryAI->setAlignment(
00262               DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
00263         // Replace this zero-sized alloca with the one at the start of the entry
00264         // block after ensuring that the address will be aligned enough for both
00265         // types.
00266         unsigned MaxAlign = std::max(EntryAI->getAlignment(),
00267                                      AI.getAlignment());
00268         EntryAI->setAlignment(MaxAlign);
00269         if (AI.getType() != EntryAI->getType())
00270           return new BitCastInst(EntryAI, AI.getType());
00271         return ReplaceInstUsesWith(AI, EntryAI);
00272       }
00273     }
00274   }
00275 
00276   if (AI.getAlignment()) {
00277     // Check to see if this allocation is only modified by a memcpy/memmove from
00278     // a constant global whose alignment is equal to or exceeds that of the
00279     // allocation.  If this is the case, we can change all users to use
00280     // the constant global instead.  This is commonly produced by the CFE by
00281     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
00282     // is only subsequently read.
00283     SmallVector<Instruction *, 4> ToDelete;
00284     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
00285       unsigned SourceAlign = getOrEnforceKnownAlignment(
00286           Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
00287       if (AI.getAlignment() <= SourceAlign) {
00288         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
00289         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
00290         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
00291           EraseInstFromFunction(*ToDelete[i]);
00292         Constant *TheSrc = cast<Constant>(Copy->getSource());
00293         Constant *Cast
00294           = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
00295         Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
00296         EraseInstFromFunction(*Copy);
00297         ++NumGlobalCopies;
00298         return NewI;
00299       }
00300     }
00301   }
00302 
00303   // At last, use the generic allocation site handler to aggressively remove
00304   // unused allocas.
00305   return visitAllocSite(AI);
00306 }
00307 
00308 /// \brief Helper to combine a load to a new type.
00309 ///
00310 /// This just does the work of combining a load to a new type. It handles
00311 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
00312 /// loaded *value* type. This will convert it to a pointer, cast the operand to
00313 /// that pointer type, load it, etc.
00314 ///
00315 /// Note that this will create all of the instructions with whatever insert
00316 /// point the \c InstCombiner currently is using.
00317 static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) {
00318   Value *Ptr = LI.getPointerOperand();
00319   unsigned AS = LI.getPointerAddressSpace();
00320   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
00321   LI.getAllMetadata(MD);
00322 
00323   LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
00324       IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
00325       LI.getAlignment(), LI.getName());
00326   MDBuilder MDB(NewLoad->getContext());
00327   for (const auto &MDPair : MD) {
00328     unsigned ID = MDPair.first;
00329     MDNode *N = MDPair.second;
00330     // Note, essentially every kind of metadata should be preserved here! This
00331     // routine is supposed to clone a load instruction changing *only its type*.
00332     // The only metadata it makes sense to drop is metadata which is invalidated
00333     // when the pointer type changes. This should essentially never be the case
00334     // in LLVM, but we explicitly switch over only known metadata to be
00335     // conservatively correct. If you are adding metadata to LLVM which pertains
00336     // to loads, you almost certainly want to add it here.
00337     switch (ID) {
00338     case LLVMContext::MD_dbg:
00339     case LLVMContext::MD_tbaa:
00340     case LLVMContext::MD_prof:
00341     case LLVMContext::MD_fpmath:
00342     case LLVMContext::MD_tbaa_struct:
00343     case LLVMContext::MD_invariant_load:
00344     case LLVMContext::MD_alias_scope:
00345     case LLVMContext::MD_noalias:
00346     case LLVMContext::MD_nontemporal:
00347     case LLVMContext::MD_mem_parallel_loop_access:
00348       // All of these directly apply.
00349       NewLoad->setMetadata(ID, N);
00350       break;
00351 
00352     case LLVMContext::MD_nonnull:
00353       // This only directly applies if the new type is also a pointer.
00354       if (NewTy->isPointerTy()) {
00355         NewLoad->setMetadata(ID, N);
00356         break;
00357       }
00358       // If it's integral now, translate it to !range metadata.
00359       if (NewTy->isIntegerTy()) {
00360         auto *ITy = cast<IntegerType>(NewTy);
00361         auto *NullInt = ConstantExpr::getPtrToInt(
00362             ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
00363         auto *NonNullInt =
00364             ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
00365         NewLoad->setMetadata(LLVMContext::MD_range,
00366                              MDB.createRange(NonNullInt, NullInt));
00367       }
00368       break;
00369 
00370     case LLVMContext::MD_range:
00371       // FIXME: It would be nice to propagate this in some way, but the type
00372       // conversions make it hard. If the new type is a pointer, we could
00373       // translate it to !nonnull metadata.
00374       break;
00375     }
00376   }
00377   return NewLoad;
00378 }
00379 
00380 /// \brief Combine a store to a new type.
00381 ///
00382 /// Returns the newly created store instruction.
00383 static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
00384   Value *Ptr = SI.getPointerOperand();
00385   unsigned AS = SI.getPointerAddressSpace();
00386   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
00387   SI.getAllMetadata(MD);
00388 
00389   StoreInst *NewStore = IC.Builder->CreateAlignedStore(
00390       V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
00391       SI.getAlignment());
00392   for (const auto &MDPair : MD) {
00393     unsigned ID = MDPair.first;
00394     MDNode *N = MDPair.second;
00395     // Note, essentially every kind of metadata should be preserved here! This
00396     // routine is supposed to clone a store instruction changing *only its
00397     // type*. The only metadata it makes sense to drop is metadata which is
00398     // invalidated when the pointer type changes. This should essentially
00399     // never be the case in LLVM, but we explicitly switch over only known
00400     // metadata to be conservatively correct. If you are adding metadata to
00401     // LLVM which pertains to stores, you almost certainly want to add it
00402     // here.
00403     switch (ID) {
00404     case LLVMContext::MD_dbg:
00405     case LLVMContext::MD_tbaa:
00406     case LLVMContext::MD_prof:
00407     case LLVMContext::MD_fpmath:
00408     case LLVMContext::MD_tbaa_struct:
00409     case LLVMContext::MD_alias_scope:
00410     case LLVMContext::MD_noalias:
00411     case LLVMContext::MD_nontemporal:
00412     case LLVMContext::MD_mem_parallel_loop_access:
00413       // All of these directly apply.
00414       NewStore->setMetadata(ID, N);
00415       break;
00416 
00417     case LLVMContext::MD_invariant_load:
00418     case LLVMContext::MD_nonnull:
00419     case LLVMContext::MD_range:
00420       // These don't apply for stores.
00421       break;
00422     }
00423   }
00424 
00425   return NewStore;
00426 }
00427 
00428 /// \brief Combine loads to match the type of value their uses after looking
00429 /// through intervening bitcasts.
00430 ///
00431 /// The core idea here is that if the result of a load is used in an operation,
00432 /// we should load the type most conducive to that operation. For example, when
00433 /// loading an integer and converting that immediately to a pointer, we should
00434 /// instead directly load a pointer.
00435 ///
00436 /// However, this routine must never change the width of a load or the number of
00437 /// loads as that would introduce a semantic change. This combine is expected to
00438 /// be a semantic no-op which just allows loads to more closely model the types
00439 /// of their consuming operations.
00440 ///
00441 /// Currently, we also refuse to change the precise type used for an atomic load
00442 /// or a volatile load. This is debatable, and might be reasonable to change
00443 /// later. However, it is risky in case some backend or other part of LLVM is
00444 /// relying on the exact type loaded to select appropriate atomic operations.
00445 static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
00446   // FIXME: We could probably with some care handle both volatile and atomic
00447   // loads here but it isn't clear that this is important.
00448   if (!LI.isSimple())
00449     return nullptr;
00450 
00451   if (LI.use_empty())
00452     return nullptr;
00453 
00454   Type *Ty = LI.getType();
00455   const DataLayout &DL = IC.getDataLayout();
00456 
00457   // Try to canonicalize loads which are only ever stored to operate over
00458   // integers instead of any other type. We only do this when the loaded type
00459   // is sized and has a size exactly the same as its store size and the store
00460   // size is a legal integer type.
00461   if (!Ty->isIntegerTy() && Ty->isSized() &&
00462       DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
00463       DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
00464     if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
00465           auto *SI = dyn_cast<StoreInst>(U);
00466           return SI && SI->getPointerOperand() != &LI;
00467         })) {
00468       LoadInst *NewLoad = combineLoadToNewType(
00469           IC, LI,
00470           Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
00471       // Replace all the stores with stores of the newly loaded value.
00472       for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
00473         auto *SI = cast<StoreInst>(*UI++);
00474         IC.Builder->SetInsertPoint(SI);
00475         combineStoreToNewValue(IC, *SI, NewLoad);
00476         IC.EraseInstFromFunction(*SI);
00477       }
00478       assert(LI.use_empty() && "Failed to remove all users of the load!");
00479       // Return the old load so the combiner can delete it safely.
00480       return &LI;
00481     }
00482   }
00483 
00484   // Fold away bit casts of the loaded value by loading the desired type.
00485   if (LI.hasOneUse())
00486     if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
00487       LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy());
00488       BC->replaceAllUsesWith(NewLoad);
00489       IC.EraseInstFromFunction(*BC);
00490       return &LI;
00491     }
00492 
00493   // FIXME: We should also canonicalize loads of vectors when their elements are
00494   // cast to other types.
00495   return nullptr;
00496 }
00497 
00498 // If we can determine that all possible objects pointed to by the provided
00499 // pointer value are, not only dereferenceable, but also definitively less than
00500 // or equal to the provided maximum size, then return true. Otherwise, return
00501 // false (constant global values and allocas fall into this category).
00502 //
00503 // FIXME: This should probably live in ValueTracking (or similar).
00504 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
00505                                      const DataLayout &DL) {
00506   SmallPtrSet<Value *, 4> Visited;
00507   SmallVector<Value *, 4> Worklist(1, V);
00508 
00509   do {
00510     Value *P = Worklist.pop_back_val();
00511     P = P->stripPointerCasts();
00512 
00513     if (!Visited.insert(P).second)
00514       continue;
00515 
00516     if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
00517       Worklist.push_back(SI->getTrueValue());
00518       Worklist.push_back(SI->getFalseValue());
00519       continue;
00520     }
00521 
00522     if (PHINode *PN = dyn_cast<PHINode>(P)) {
00523       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00524         Worklist.push_back(PN->getIncomingValue(i));
00525       continue;
00526     }
00527 
00528     if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
00529       if (GA->mayBeOverridden())
00530         return false;
00531       Worklist.push_back(GA->getAliasee());
00532       continue;
00533     }
00534 
00535     // If we know how big this object is, and it is less than MaxSize, continue
00536     // searching. Otherwise, return false.
00537     if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
00538       if (!AI->getAllocatedType()->isSized())
00539         return false;
00540 
00541       ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
00542       if (!CS)
00543         return false;
00544 
00545       uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
00546       // Make sure that, even if the multiplication below would wrap as an
00547       // uint64_t, we still do the right thing.
00548       if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
00549         return false;
00550       continue;
00551     }
00552 
00553     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
00554       if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
00555         return false;
00556 
00557       uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
00558       if (InitSize > MaxSize)
00559         return false;
00560       continue;
00561     }
00562 
00563     return false;
00564   } while (!Worklist.empty());
00565 
00566   return true;
00567 }
00568 
00569 // If we're indexing into an object of a known size, and the outer index is
00570 // not a constant, but having any value but zero would lead to undefined
00571 // behavior, replace it with zero.
00572 //
00573 // For example, if we have:
00574 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
00575 // ...
00576 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
00577 // ... = load i32* %arrayidx, align 4
00578 // Then we know that we can replace %x in the GEP with i64 0.
00579 //
00580 // FIXME: We could fold any GEP index to zero that would cause UB if it were
00581 // not zero. Currently, we only handle the first such index. Also, we could
00582 // also search through non-zero constant indices if we kept track of the
00583 // offsets those indices implied.
00584 static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
00585                                      Instruction *MemI, unsigned &Idx) {
00586   if (GEPI->getNumOperands() < 2)
00587     return false;
00588 
00589   // Find the first non-zero index of a GEP. If all indices are zero, return
00590   // one past the last index.
00591   auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
00592     unsigned I = 1;
00593     for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
00594       Value *V = GEPI->getOperand(I);
00595       if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
00596         if (CI->isZero())
00597           continue;
00598 
00599       break;
00600     }
00601 
00602     return I;
00603   };
00604 
00605   // Skip through initial 'zero' indices, and find the corresponding pointer
00606   // type. See if the next index is not a constant.
00607   Idx = FirstNZIdx(GEPI);
00608   if (Idx == GEPI->getNumOperands())
00609     return false;
00610   if (isa<Constant>(GEPI->getOperand(Idx)))
00611     return false;
00612 
00613   SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
00614   Type *AllocTy = GetElementPtrInst::getIndexedType(
00615       cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
00616           ->getElementType(),
00617       Ops);
00618   if (!AllocTy || !AllocTy->isSized())
00619     return false;
00620   const DataLayout &DL = IC.getDataLayout();
00621   uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
00622 
00623   // If there are more indices after the one we might replace with a zero, make
00624   // sure they're all non-negative. If any of them are negative, the overall
00625   // address being computed might be before the base address determined by the
00626   // first non-zero index.
00627   auto IsAllNonNegative = [&]() {
00628     for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
00629       bool KnownNonNegative, KnownNegative;
00630       IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
00631                         KnownNegative, 0, MemI);
00632       if (KnownNonNegative)
00633         continue;
00634       return false;
00635     }
00636 
00637     return true;
00638   };
00639 
00640   // FIXME: If the GEP is not inbounds, and there are extra indices after the
00641   // one we'll replace, those could cause the address computation to wrap
00642   // (rendering the IsAllNonNegative() check below insufficient). We can do
00643   // better, ignoring zero indicies (and other indicies we can prove small
00644   // enough not to wrap).
00645   if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
00646     return false;
00647 
00648   // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
00649   // also known to be dereferenceable.
00650   return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
00651          IsAllNonNegative();
00652 }
00653 
00654 // If we're indexing into an object with a variable index for the memory
00655 // access, but the object has only one element, we can assume that the index
00656 // will always be zero. If we replace the GEP, return it.
00657 template <typename T>
00658 static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
00659                                           T &MemI) {
00660   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
00661     unsigned Idx;
00662     if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
00663       Instruction *NewGEPI = GEPI->clone();
00664       NewGEPI->setOperand(Idx,
00665         ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
00666       NewGEPI->insertBefore(GEPI);
00667       MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
00668       return NewGEPI;
00669     }
00670   }
00671 
00672   return nullptr;
00673 }
00674 
00675 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
00676   Value *Op = LI.getOperand(0);
00677 
00678   // Try to canonicalize the loaded type.
00679   if (Instruction *Res = combineLoadToOperationType(*this, LI))
00680     return Res;
00681 
00682   // Attempt to improve the alignment.
00683   unsigned KnownAlign = getOrEnforceKnownAlignment(
00684       Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
00685   unsigned LoadAlign = LI.getAlignment();
00686   unsigned EffectiveLoadAlign =
00687       LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
00688 
00689   if (KnownAlign > EffectiveLoadAlign)
00690     LI.setAlignment(KnownAlign);
00691   else if (LoadAlign == 0)
00692     LI.setAlignment(EffectiveLoadAlign);
00693 
00694   // Replace GEP indices if possible.
00695   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
00696       Worklist.Add(NewGEPI);
00697       return &LI;
00698   }
00699 
00700   // None of the following transforms are legal for volatile/atomic loads.
00701   // FIXME: Some of it is okay for atomic loads; needs refactoring.
00702   if (!LI.isSimple()) return nullptr;
00703 
00704   // Do really simple store-to-load forwarding and load CSE, to catch cases
00705   // where there are several consecutive memory accesses to the same location,
00706   // separated by a few arithmetic operations.
00707   BasicBlock::iterator BBI = &LI;
00708   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
00709     return ReplaceInstUsesWith(
00710         LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
00711                                             LI.getName() + ".cast"));
00712 
00713   // load(gep null, ...) -> unreachable
00714   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
00715     const Value *GEPI0 = GEPI->getOperand(0);
00716     // TODO: Consider a target hook for valid address spaces for this xform.
00717     if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
00718       // Insert a new store to null instruction before the load to indicate
00719       // that this code is not reachable.  We do this instead of inserting
00720       // an unreachable instruction directly because we cannot modify the
00721       // CFG.
00722       new StoreInst(UndefValue::get(LI.getType()),
00723                     Constant::getNullValue(Op->getType()), &LI);
00724       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00725     }
00726   }
00727 
00728   // load null/undef -> unreachable
00729   // TODO: Consider a target hook for valid address spaces for this xform.
00730   if (isa<UndefValue>(Op) ||
00731       (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
00732     // Insert a new store to null instruction before the load to indicate that
00733     // this code is not reachable.  We do this instead of inserting an
00734     // unreachable instruction directly because we cannot modify the CFG.
00735     new StoreInst(UndefValue::get(LI.getType()),
00736                   Constant::getNullValue(Op->getType()), &LI);
00737     return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00738   }
00739 
00740   if (Op->hasOneUse()) {
00741     // Change select and PHI nodes to select values instead of addresses: this
00742     // helps alias analysis out a lot, allows many others simplifications, and
00743     // exposes redundancy in the code.
00744     //
00745     // Note that we cannot do the transformation unless we know that the
00746     // introduced loads cannot trap!  Something like this is valid as long as
00747     // the condition is always false: load (select bool %C, int* null, int* %G),
00748     // but it would not be valid if we transformed it to load from null
00749     // unconditionally.
00750     //
00751     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
00752       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
00753       unsigned Align = LI.getAlignment();
00754       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
00755           isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
00756         LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
00757                                            SI->getOperand(1)->getName()+".val");
00758         LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
00759                                            SI->getOperand(2)->getName()+".val");
00760         V1->setAlignment(Align);
00761         V2->setAlignment(Align);
00762         return SelectInst::Create(SI->getCondition(), V1, V2);
00763       }
00764 
00765       // load (select (cond, null, P)) -> load P
00766       if (isa<ConstantPointerNull>(SI->getOperand(1)) && 
00767           LI.getPointerAddressSpace() == 0) {
00768         LI.setOperand(0, SI->getOperand(2));
00769         return &LI;
00770       }
00771 
00772       // load (select (cond, P, null)) -> load P
00773       if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
00774           LI.getPointerAddressSpace() == 0) {
00775         LI.setOperand(0, SI->getOperand(1));
00776         return &LI;
00777       }
00778     }
00779   }
00780   return nullptr;
00781 }
00782 
00783 /// \brief Combine stores to match the type of value being stored.
00784 ///
00785 /// The core idea here is that the memory does not have any intrinsic type and
00786 /// where we can we should match the type of a store to the type of value being
00787 /// stored.
00788 ///
00789 /// However, this routine must never change the width of a store or the number of
00790 /// stores as that would introduce a semantic change. This combine is expected to
00791 /// be a semantic no-op which just allows stores to more closely model the types
00792 /// of their incoming values.
00793 ///
00794 /// Currently, we also refuse to change the precise type used for an atomic or
00795 /// volatile store. This is debatable, and might be reasonable to change later.
00796 /// However, it is risky in case some backend or other part of LLVM is relying
00797 /// on the exact type stored to select appropriate atomic operations.
00798 ///
00799 /// \returns true if the store was successfully combined away. This indicates
00800 /// the caller must erase the store instruction. We have to let the caller erase
00801 /// the store instruction sas otherwise there is no way to signal whether it was
00802 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
00803 static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
00804   // FIXME: We could probably with some care handle both volatile and atomic
00805   // stores here but it isn't clear that this is important.
00806   if (!SI.isSimple())
00807     return false;
00808 
00809   Value *V = SI.getValueOperand();
00810 
00811   // Fold away bit casts of the stored value by storing the original type.
00812   if (auto *BC = dyn_cast<BitCastInst>(V)) {
00813     V = BC->getOperand(0);
00814     combineStoreToNewValue(IC, SI, V);
00815     return true;
00816   }
00817 
00818   // FIXME: We should also canonicalize loads of vectors when their elements are
00819   // cast to other types.
00820   return false;
00821 }
00822 
00823 static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
00824   // FIXME: We could probably with some care handle both volatile and atomic
00825   // stores here but it isn't clear that this is important.
00826   if (!SI.isSimple())
00827     return false;
00828 
00829   Value *V = SI.getValueOperand();
00830   Type *T = V->getType();
00831 
00832   if (!T->isAggregateType())
00833     return false;
00834 
00835   if (StructType *ST = dyn_cast<StructType>(T)) {
00836     // If the struct only have one element, we unpack.
00837     if (ST->getNumElements() == 1) {
00838       V = IC.Builder->CreateExtractValue(V, 0);
00839       combineStoreToNewValue(IC, SI, V);
00840       return true;
00841     }
00842   }
00843 
00844   return false;
00845 }
00846 
00847 /// equivalentAddressValues - Test if A and B will obviously have the same
00848 /// value. This includes recognizing that %t0 and %t1 will have the same
00849 /// value in code like this:
00850 ///   %t0 = getelementptr \@a, 0, 3
00851 ///   store i32 0, i32* %t0
00852 ///   %t1 = getelementptr \@a, 0, 3
00853 ///   %t2 = load i32* %t1
00854 ///
00855 static bool equivalentAddressValues(Value *A, Value *B) {
00856   // Test if the values are trivially equivalent.
00857   if (A == B) return true;
00858 
00859   // Test if the values come form identical arithmetic instructions.
00860   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
00861   // its only used to compare two uses within the same basic block, which
00862   // means that they'll always either have the same value or one of them
00863   // will have an undefined value.
00864   if (isa<BinaryOperator>(A) ||
00865       isa<CastInst>(A) ||
00866       isa<PHINode>(A) ||
00867       isa<GetElementPtrInst>(A))
00868     if (Instruction *BI = dyn_cast<Instruction>(B))
00869       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
00870         return true;
00871 
00872   // Otherwise they may not be equivalent.
00873   return false;
00874 }
00875 
00876 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
00877   Value *Val = SI.getOperand(0);
00878   Value *Ptr = SI.getOperand(1);
00879 
00880   // Try to canonicalize the stored type.
00881   if (combineStoreToValueType(*this, SI))
00882     return EraseInstFromFunction(SI);
00883 
00884   // Attempt to improve the alignment.
00885   unsigned KnownAlign = getOrEnforceKnownAlignment(
00886       Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
00887   unsigned StoreAlign = SI.getAlignment();
00888   unsigned EffectiveStoreAlign =
00889       StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
00890 
00891   if (KnownAlign > EffectiveStoreAlign)
00892     SI.setAlignment(KnownAlign);
00893   else if (StoreAlign == 0)
00894     SI.setAlignment(EffectiveStoreAlign);
00895 
00896   // Try to canonicalize the stored type.
00897   if (unpackStoreToAggregate(*this, SI))
00898     return EraseInstFromFunction(SI);
00899 
00900   // Replace GEP indices if possible.
00901   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
00902       Worklist.Add(NewGEPI);
00903       return &SI;
00904   }
00905 
00906   // Don't hack volatile/atomic stores.
00907   // FIXME: Some bits are legal for atomic stores; needs refactoring.
00908   if (!SI.isSimple()) return nullptr;
00909 
00910   // If the RHS is an alloca with a single use, zapify the store, making the
00911   // alloca dead.
00912   if (Ptr->hasOneUse()) {
00913     if (isa<AllocaInst>(Ptr))
00914       return EraseInstFromFunction(SI);
00915     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
00916       if (isa<AllocaInst>(GEP->getOperand(0))) {
00917         if (GEP->getOperand(0)->hasOneUse())
00918           return EraseInstFromFunction(SI);
00919       }
00920     }
00921   }
00922 
00923   // Do really simple DSE, to catch cases where there are several consecutive
00924   // stores to the same location, separated by a few arithmetic operations. This
00925   // situation often occurs with bitfield accesses.
00926   BasicBlock::iterator BBI = &SI;
00927   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
00928        --ScanInsts) {
00929     --BBI;
00930     // Don't count debug info directives, lest they affect codegen,
00931     // and we skip pointer-to-pointer bitcasts, which are NOPs.
00932     if (isa<DbgInfoIntrinsic>(BBI) ||
00933         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
00934       ScanInsts++;
00935       continue;
00936     }
00937 
00938     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
00939       // Prev store isn't volatile, and stores to the same location?
00940       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
00941                                                         SI.getOperand(1))) {
00942         ++NumDeadStore;
00943         ++BBI;
00944         EraseInstFromFunction(*PrevSI);
00945         continue;
00946       }
00947       break;
00948     }
00949 
00950     // If this is a load, we have to stop.  However, if the loaded value is from
00951     // the pointer we're loading and is producing the pointer we're storing,
00952     // then *this* store is dead (X = load P; store X -> P).
00953     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
00954       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
00955           LI->isSimple())
00956         return EraseInstFromFunction(SI);
00957 
00958       // Otherwise, this is a load from some other location.  Stores before it
00959       // may not be dead.
00960       break;
00961     }
00962 
00963     // Don't skip over loads or things that can modify memory.
00964     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
00965       break;
00966   }
00967 
00968   // store X, null    -> turns into 'unreachable' in SimplifyCFG
00969   if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
00970     if (!isa<UndefValue>(Val)) {
00971       SI.setOperand(0, UndefValue::get(Val->getType()));
00972       if (Instruction *U = dyn_cast<Instruction>(Val))
00973         Worklist.Add(U);  // Dropped a use.
00974     }
00975     return nullptr;  // Do not modify these!
00976   }
00977 
00978   // store undef, Ptr -> noop
00979   if (isa<UndefValue>(Val))
00980     return EraseInstFromFunction(SI);
00981 
00982   // If this store is the last instruction in the basic block (possibly
00983   // excepting debug info instructions), and if the block ends with an
00984   // unconditional branch, try to move it to the successor block.
00985   BBI = &SI;
00986   do {
00987     ++BBI;
00988   } while (isa<DbgInfoIntrinsic>(BBI) ||
00989            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
00990   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
00991     if (BI->isUnconditional())
00992       if (SimplifyStoreAtEndOfBlock(SI))
00993         return nullptr;  // xform done!
00994 
00995   return nullptr;
00996 }
00997 
00998 /// SimplifyStoreAtEndOfBlock - Turn things like:
00999 ///   if () { *P = v1; } else { *P = v2 }
01000 /// into a phi node with a store in the successor.
01001 ///
01002 /// Simplify things like:
01003 ///   *P = v1; if () { *P = v2; }
01004 /// into a phi node with a store in the successor.
01005 ///
01006 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
01007   BasicBlock *StoreBB = SI.getParent();
01008 
01009   // Check to see if the successor block has exactly two incoming edges.  If
01010   // so, see if the other predecessor contains a store to the same location.
01011   // if so, insert a PHI node (if needed) and move the stores down.
01012   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
01013 
01014   // Determine whether Dest has exactly two predecessors and, if so, compute
01015   // the other predecessor.
01016   pred_iterator PI = pred_begin(DestBB);
01017   BasicBlock *P = *PI;
01018   BasicBlock *OtherBB = nullptr;
01019 
01020   if (P != StoreBB)
01021     OtherBB = P;
01022 
01023   if (++PI == pred_end(DestBB))
01024     return false;
01025 
01026   P = *PI;
01027   if (P != StoreBB) {
01028     if (OtherBB)
01029       return false;
01030     OtherBB = P;
01031   }
01032   if (++PI != pred_end(DestBB))
01033     return false;
01034 
01035   // Bail out if all the relevant blocks aren't distinct (this can happen,
01036   // for example, if SI is in an infinite loop)
01037   if (StoreBB == DestBB || OtherBB == DestBB)
01038     return false;
01039 
01040   // Verify that the other block ends in a branch and is not otherwise empty.
01041   BasicBlock::iterator BBI = OtherBB->getTerminator();
01042   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
01043   if (!OtherBr || BBI == OtherBB->begin())
01044     return false;
01045 
01046   // If the other block ends in an unconditional branch, check for the 'if then
01047   // else' case.  there is an instruction before the branch.
01048   StoreInst *OtherStore = nullptr;
01049   if (OtherBr->isUnconditional()) {
01050     --BBI;
01051     // Skip over debugging info.
01052     while (isa<DbgInfoIntrinsic>(BBI) ||
01053            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
01054       if (BBI==OtherBB->begin())
01055         return false;
01056       --BBI;
01057     }
01058     // If this isn't a store, isn't a store to the same location, or is not the
01059     // right kind of store, bail out.
01060     OtherStore = dyn_cast<StoreInst>(BBI);
01061     if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
01062         !SI.isSameOperationAs(OtherStore))
01063       return false;
01064   } else {
01065     // Otherwise, the other block ended with a conditional branch. If one of the
01066     // destinations is StoreBB, then we have the if/then case.
01067     if (OtherBr->getSuccessor(0) != StoreBB &&
01068         OtherBr->getSuccessor(1) != StoreBB)
01069       return false;
01070 
01071     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
01072     // if/then triangle.  See if there is a store to the same ptr as SI that
01073     // lives in OtherBB.
01074     for (;; --BBI) {
01075       // Check to see if we find the matching store.
01076       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
01077         if (OtherStore->getOperand(1) != SI.getOperand(1) ||
01078             !SI.isSameOperationAs(OtherStore))
01079           return false;
01080         break;
01081       }
01082       // If we find something that may be using or overwriting the stored
01083       // value, or if we run out of instructions, we can't do the xform.
01084       if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
01085           BBI == OtherBB->begin())
01086         return false;
01087     }
01088 
01089     // In order to eliminate the store in OtherBr, we have to
01090     // make sure nothing reads or overwrites the stored value in
01091     // StoreBB.
01092     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
01093       // FIXME: This should really be AA driven.
01094       if (I->mayReadFromMemory() || I->mayWriteToMemory())
01095         return false;
01096     }
01097   }
01098 
01099   // Insert a PHI node now if we need it.
01100   Value *MergedVal = OtherStore->getOperand(0);
01101   if (MergedVal != SI.getOperand(0)) {
01102     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
01103     PN->addIncoming(SI.getOperand(0), SI.getParent());
01104     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
01105     MergedVal = InsertNewInstBefore(PN, DestBB->front());
01106   }
01107 
01108   // Advance to a place where it is safe to insert the new store and
01109   // insert it.
01110   BBI = DestBB->getFirstInsertionPt();
01111   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
01112                                    SI.isVolatile(),
01113                                    SI.getAlignment(),
01114                                    SI.getOrdering(),
01115                                    SI.getSynchScope());
01116   InsertNewInstBefore(NewSI, *BBI);
01117   NewSI->setDebugLoc(OtherStore->getDebugLoc());
01118 
01119   // If the two stores had AA tags, merge them.
01120   AAMDNodes AATags;
01121   SI.getAAMetadata(AATags);
01122   if (AATags) {
01123     OtherStore->getAAMetadata(AATags, /* Merge = */ true);
01124     NewSI->setAAMetadata(AATags);
01125   }
01126 
01127   // Nuke the old stores.
01128   EraseInstFromFunction(SI);
01129   EraseInstFromFunction(*OtherStore);
01130   return true;
01131 }