LLVM API Documentation

InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
00001 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the visit functions for load, store and alloca.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "InstCombineInternal.h"
00015 #include "llvm/ADT/Statistic.h"
00016 #include "llvm/Analysis/Loads.h"
00017 #include "llvm/IR/DataLayout.h"
00018 #include "llvm/IR/LLVMContext.h"
00019 #include "llvm/IR/IntrinsicInst.h"
00020 #include "llvm/IR/MDBuilder.h"
00021 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00022 #include "llvm/Transforms/Utils/Local.h"
00023 using namespace llvm;
00024 
00025 #define DEBUG_TYPE "instcombine"
00026 
00027 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
00028 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
00029 
00030 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
00031 /// some part of a constant global variable.  This intentionally only accepts
00032 /// constant expressions because we can't rewrite arbitrary instructions.
00033 static bool pointsToConstantGlobal(Value *V) {
00034   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00035     return GV->isConstant();
00036 
00037   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
00038     if (CE->getOpcode() == Instruction::BitCast ||
00039         CE->getOpcode() == Instruction::AddrSpaceCast ||
00040         CE->getOpcode() == Instruction::GetElementPtr)
00041       return pointsToConstantGlobal(CE->getOperand(0));
00042   }
00043   return false;
00044 }
00045 
00046 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
00047 /// pointer to an alloca.  Ignore any reads of the pointer, return false if we
00048 /// see any stores or other unknown uses.  If we see pointer arithmetic, keep
00049 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
00050 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
00051 /// the alloca, and if the source pointer is a pointer to a constant global, we
00052 /// can optimize this.
00053 static bool
00054 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
00055                                SmallVectorImpl<Instruction *> &ToDelete) {
00056   // We track lifetime intrinsics as we encounter them.  If we decide to go
00057   // ahead and replace the value with the global, this lets the caller quickly
00058   // eliminate the markers.
00059 
00060   SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
00061   ValuesToInspect.push_back(std::make_pair(V, false));
00062   while (!ValuesToInspect.empty()) {
00063     auto ValuePair = ValuesToInspect.pop_back_val();
00064     const bool IsOffset = ValuePair.second;
00065     for (auto &U : ValuePair.first->uses()) {
00066       Instruction *I = cast<Instruction>(U.getUser());
00067 
00068       if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
00069         // Ignore non-volatile loads, they are always ok.
00070         if (!LI->isSimple()) return false;
00071         continue;
00072       }
00073 
00074       if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
00075         // If uses of the bitcast are ok, we are ok.
00076         ValuesToInspect.push_back(std::make_pair(I, IsOffset));
00077         continue;
00078       }
00079       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
00080         // If the GEP has all zero indices, it doesn't offset the pointer. If it
00081         // doesn't, it does.
00082         ValuesToInspect.push_back(
00083             std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
00084         continue;
00085       }
00086 
00087       if (CallSite CS = I) {
00088         // If this is the function being called then we treat it like a load and
00089         // ignore it.
00090         if (CS.isCallee(&U))
00091           continue;
00092 
00093         // Inalloca arguments are clobbered by the call.
00094         unsigned ArgNo = CS.getArgumentNo(&U);
00095         if (CS.isInAllocaArgument(ArgNo))
00096           return false;
00097 
00098         // If this is a readonly/readnone call site, then we know it is just a
00099         // load (but one that potentially returns the value itself), so we can
00100         // ignore it if we know that the value isn't captured.
00101         if (CS.onlyReadsMemory() &&
00102             (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
00103           continue;
00104 
00105         // If this is being passed as a byval argument, the caller is making a
00106         // copy, so it is only a read of the alloca.
00107         if (CS.isByValArgument(ArgNo))
00108           continue;
00109       }
00110 
00111       // Lifetime intrinsics can be handled by the caller.
00112       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00113         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
00114             II->getIntrinsicID() == Intrinsic::lifetime_end) {
00115           assert(II->use_empty() && "Lifetime markers have no result to use!");
00116           ToDelete.push_back(II);
00117           continue;
00118         }
00119       }
00120 
00121       // If this is isn't our memcpy/memmove, reject it as something we can't
00122       // handle.
00123       MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
00124       if (!MI)
00125         return false;
00126 
00127       // If the transfer is using the alloca as a source of the transfer, then
00128       // ignore it since it is a load (unless the transfer is volatile).
00129       if (U.getOperandNo() == 1) {
00130         if (MI->isVolatile()) return false;
00131         continue;
00132       }
00133 
00134       // If we already have seen a copy, reject the second one.
00135       if (TheCopy) return false;
00136 
00137       // If the pointer has been offset from the start of the alloca, we can't
00138       // safely handle this.
00139       if (IsOffset) return false;
00140 
00141       // If the memintrinsic isn't using the alloca as the dest, reject it.
00142       if (U.getOperandNo() != 0) return false;
00143 
00144       // If the source of the memcpy/move is not a constant global, reject it.
00145       if (!pointsToConstantGlobal(MI->getSource()))
00146         return false;
00147 
00148       // Otherwise, the transform is safe.  Remember the copy instruction.
00149       TheCopy = MI;
00150     }
00151   }
00152   return true;
00153 }
00154 
00155 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
00156 /// modified by a copy from a constant global.  If we can prove this, we can
00157 /// replace any uses of the alloca with uses of the global directly.
00158 static MemTransferInst *
00159 isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
00160                                SmallVectorImpl<Instruction *> &ToDelete) {
00161   MemTransferInst *TheCopy = nullptr;
00162   if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
00163     return TheCopy;
00164   return nullptr;
00165 }
00166 
00167 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
00168   // Ensure that the alloca array size argument has type intptr_t, so that
00169   // any casting is exposed early.
00170   if (DL) {
00171     Type *IntPtrTy = DL->getIntPtrType(AI.getType());
00172     if (AI.getArraySize()->getType() != IntPtrTy) {
00173       Value *V = Builder->CreateIntCast(AI.getArraySize(),
00174                                         IntPtrTy, false);
00175       AI.setOperand(0, V);
00176       return &AI;
00177     }
00178   }
00179 
00180   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
00181   if (AI.isArrayAllocation()) {  // Check C != 1
00182     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
00183       Type *NewTy =
00184         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
00185       AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
00186       New->setAlignment(AI.getAlignment());
00187 
00188       // Scan to the end of the allocation instructions, to skip over a block of
00189       // allocas if possible...also skip interleaved debug info
00190       //
00191       BasicBlock::iterator It = New;
00192       while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
00193 
00194       // Now that I is pointing to the first non-allocation-inst in the block,
00195       // insert our getelementptr instruction...
00196       //
00197       Type *IdxTy = DL
00198                   ? DL->getIntPtrType(AI.getType())
00199                   : Type::getInt64Ty(AI.getContext());
00200       Value *NullIdx = Constant::getNullValue(IdxTy);
00201       Value *Idx[2] = { NullIdx, NullIdx };
00202       Instruction *GEP =
00203         GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
00204       InsertNewInstBefore(GEP, *It);
00205 
00206       // Now make everything use the getelementptr instead of the original
00207       // allocation.
00208       return ReplaceInstUsesWith(AI, GEP);
00209     } else if (isa<UndefValue>(AI.getArraySize())) {
00210       return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
00211     }
00212   }
00213 
00214   if (DL && AI.getAllocatedType()->isSized()) {
00215     // If the alignment is 0 (unspecified), assign it the preferred alignment.
00216     if (AI.getAlignment() == 0)
00217       AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
00218 
00219     // Move all alloca's of zero byte objects to the entry block and merge them
00220     // together.  Note that we only do this for alloca's, because malloc should
00221     // allocate and return a unique pointer, even for a zero byte allocation.
00222     if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
00223       // For a zero sized alloca there is no point in doing an array allocation.
00224       // This is helpful if the array size is a complicated expression not used
00225       // elsewhere.
00226       if (AI.isArrayAllocation()) {
00227         AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
00228         return &AI;
00229       }
00230 
00231       // Get the first instruction in the entry block.
00232       BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
00233       Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
00234       if (FirstInst != &AI) {
00235         // If the entry block doesn't start with a zero-size alloca then move
00236         // this one to the start of the entry block.  There is no problem with
00237         // dominance as the array size was forced to a constant earlier already.
00238         AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
00239         if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
00240             DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
00241           AI.moveBefore(FirstInst);
00242           return &AI;
00243         }
00244 
00245         // If the alignment of the entry block alloca is 0 (unspecified),
00246         // assign it the preferred alignment.
00247         if (EntryAI->getAlignment() == 0)
00248           EntryAI->setAlignment(
00249             DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
00250         // Replace this zero-sized alloca with the one at the start of the entry
00251         // block after ensuring that the address will be aligned enough for both
00252         // types.
00253         unsigned MaxAlign = std::max(EntryAI->getAlignment(),
00254                                      AI.getAlignment());
00255         EntryAI->setAlignment(MaxAlign);
00256         if (AI.getType() != EntryAI->getType())
00257           return new BitCastInst(EntryAI, AI.getType());
00258         return ReplaceInstUsesWith(AI, EntryAI);
00259       }
00260     }
00261   }
00262 
00263   if (AI.getAlignment()) {
00264     // Check to see if this allocation is only modified by a memcpy/memmove from
00265     // a constant global whose alignment is equal to or exceeds that of the
00266     // allocation.  If this is the case, we can change all users to use
00267     // the constant global instead.  This is commonly produced by the CFE by
00268     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
00269     // is only subsequently read.
00270     SmallVector<Instruction *, 4> ToDelete;
00271     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
00272       unsigned SourceAlign = getOrEnforceKnownAlignment(
00273           Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT);
00274       if (AI.getAlignment() <= SourceAlign) {
00275         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
00276         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
00277         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
00278           EraseInstFromFunction(*ToDelete[i]);
00279         Constant *TheSrc = cast<Constant>(Copy->getSource());
00280         Constant *Cast
00281           = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
00282         Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
00283         EraseInstFromFunction(*Copy);
00284         ++NumGlobalCopies;
00285         return NewI;
00286       }
00287     }
00288   }
00289 
00290   // At last, use the generic allocation site handler to aggressively remove
00291   // unused allocas.
00292   return visitAllocSite(AI);
00293 }
00294 
00295 /// \brief Helper to combine a load to a new type.
00296 ///
00297 /// This just does the work of combining a load to a new type. It handles
00298 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
00299 /// loaded *value* type. This will convert it to a pointer, cast the operand to
00300 /// that pointer type, load it, etc.
00301 ///
00302 /// Note that this will create all of the instructions with whatever insert
00303 /// point the \c InstCombiner currently is using.
00304 static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) {
00305   Value *Ptr = LI.getPointerOperand();
00306   unsigned AS = LI.getPointerAddressSpace();
00307   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
00308   LI.getAllMetadata(MD);
00309 
00310   LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
00311       IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
00312       LI.getAlignment(), LI.getName());
00313   MDBuilder MDB(NewLoad->getContext());
00314   for (const auto &MDPair : MD) {
00315     unsigned ID = MDPair.first;
00316     MDNode *N = MDPair.second;
00317     // Note, essentially every kind of metadata should be preserved here! This
00318     // routine is supposed to clone a load instruction changing *only its type*.
00319     // The only metadata it makes sense to drop is metadata which is invalidated
00320     // when the pointer type changes. This should essentially never be the case
00321     // in LLVM, but we explicitly switch over only known metadata to be
00322     // conservatively correct. If you are adding metadata to LLVM which pertains
00323     // to loads, you almost certainly want to add it here.
00324     switch (ID) {
00325     case LLVMContext::MD_dbg:
00326     case LLVMContext::MD_tbaa:
00327     case LLVMContext::MD_prof:
00328     case LLVMContext::MD_fpmath:
00329     case LLVMContext::MD_tbaa_struct:
00330     case LLVMContext::MD_invariant_load:
00331     case LLVMContext::MD_alias_scope:
00332     case LLVMContext::MD_noalias:
00333     case LLVMContext::MD_nontemporal:
00334     case LLVMContext::MD_mem_parallel_loop_access:
00335       // All of these directly apply.
00336       NewLoad->setMetadata(ID, N);
00337       break;
00338 
00339     case LLVMContext::MD_nonnull:
00340       // This only directly applies if the new type is also a pointer.
00341       if (NewTy->isPointerTy()) {
00342         NewLoad->setMetadata(ID, N);
00343         break;
00344       }
00345       // If it's integral now, translate it to !range metadata.
00346       if (NewTy->isIntegerTy()) {
00347         auto *ITy = cast<IntegerType>(NewTy);
00348         auto *NullInt = ConstantExpr::getPtrToInt(
00349             ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
00350         auto *NonNullInt =
00351             ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
00352         NewLoad->setMetadata(LLVMContext::MD_range,
00353                              MDB.createRange(NonNullInt, NullInt));
00354       }
00355       break;
00356 
00357     case LLVMContext::MD_range:
00358       // FIXME: It would be nice to propagate this in some way, but the type
00359       // conversions make it hard. If the new type is a pointer, we could
00360       // translate it to !nonnull metadata.
00361       break;
00362     }
00363   }
00364   return NewLoad;
00365 }
00366 
00367 /// \brief Combine a store to a new type.
00368 ///
00369 /// Returns the newly created store instruction.
00370 static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
00371   Value *Ptr = SI.getPointerOperand();
00372   unsigned AS = SI.getPointerAddressSpace();
00373   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
00374   SI.getAllMetadata(MD);
00375 
00376   StoreInst *NewStore = IC.Builder->CreateAlignedStore(
00377       V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
00378       SI.getAlignment());
00379   for (const auto &MDPair : MD) {
00380     unsigned ID = MDPair.first;
00381     MDNode *N = MDPair.second;
00382     // Note, essentially every kind of metadata should be preserved here! This
00383     // routine is supposed to clone a store instruction changing *only its
00384     // type*. The only metadata it makes sense to drop is metadata which is
00385     // invalidated when the pointer type changes. This should essentially
00386     // never be the case in LLVM, but we explicitly switch over only known
00387     // metadata to be conservatively correct. If you are adding metadata to
00388     // LLVM which pertains to stores, you almost certainly want to add it
00389     // here.
00390     switch (ID) {
00391     case LLVMContext::MD_dbg:
00392     case LLVMContext::MD_tbaa:
00393     case LLVMContext::MD_prof:
00394     case LLVMContext::MD_fpmath:
00395     case LLVMContext::MD_tbaa_struct:
00396     case LLVMContext::MD_alias_scope:
00397     case LLVMContext::MD_noalias:
00398     case LLVMContext::MD_nontemporal:
00399     case LLVMContext::MD_mem_parallel_loop_access:
00400       // All of these directly apply.
00401       NewStore->setMetadata(ID, N);
00402       break;
00403 
00404     case LLVMContext::MD_invariant_load:
00405     case LLVMContext::MD_nonnull:
00406     case LLVMContext::MD_range:
00407       // These don't apply for stores.
00408       break;
00409     }
00410   }
00411 
00412   return NewStore;
00413 }
00414 
00415 /// \brief Combine loads to match the type of value their uses after looking
00416 /// through intervening bitcasts.
00417 ///
00418 /// The core idea here is that if the result of a load is used in an operation,
00419 /// we should load the type most conducive to that operation. For example, when
00420 /// loading an integer and converting that immediately to a pointer, we should
00421 /// instead directly load a pointer.
00422 ///
00423 /// However, this routine must never change the width of a load or the number of
00424 /// loads as that would introduce a semantic change. This combine is expected to
00425 /// be a semantic no-op which just allows loads to more closely model the types
00426 /// of their consuming operations.
00427 ///
00428 /// Currently, we also refuse to change the precise type used for an atomic load
00429 /// or a volatile load. This is debatable, and might be reasonable to change
00430 /// later. However, it is risky in case some backend or other part of LLVM is
00431 /// relying on the exact type loaded to select appropriate atomic operations.
00432 static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
00433   // FIXME: We could probably with some care handle both volatile and atomic
00434   // loads here but it isn't clear that this is important.
00435   if (!LI.isSimple())
00436     return nullptr;
00437 
00438   if (LI.use_empty())
00439     return nullptr;
00440 
00441   Type *Ty = LI.getType();
00442 
00443   // Try to canonicalize loads which are only ever stored to operate over
00444   // integers instead of any other type. We only do this when the loaded type
00445   // is sized and has a size exactly the same as its store size and the store
00446   // size is a legal integer type.
00447   const DataLayout *DL = IC.getDataLayout();
00448   if (!Ty->isIntegerTy() && Ty->isSized() && DL &&
00449       DL->isLegalInteger(DL->getTypeStoreSizeInBits(Ty)) &&
00450       DL->getTypeStoreSizeInBits(Ty) == DL->getTypeSizeInBits(Ty)) {
00451     if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
00452           auto *SI = dyn_cast<StoreInst>(U);
00453           return SI && SI->getPointerOperand() != &LI;
00454         })) {
00455       LoadInst *NewLoad = combineLoadToNewType(
00456           IC, LI,
00457           Type::getIntNTy(LI.getContext(), DL->getTypeStoreSizeInBits(Ty)));
00458       // Replace all the stores with stores of the newly loaded value.
00459       for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
00460         auto *SI = cast<StoreInst>(*UI++);
00461         IC.Builder->SetInsertPoint(SI);
00462         combineStoreToNewValue(IC, *SI, NewLoad);
00463         IC.EraseInstFromFunction(*SI);
00464       }
00465       assert(LI.use_empty() && "Failed to remove all users of the load!");
00466       // Return the old load so the combiner can delete it safely.
00467       return &LI;
00468     }
00469   }
00470 
00471   // Fold away bit casts of the loaded value by loading the desired type.
00472   if (LI.hasOneUse())
00473     if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
00474       LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy());
00475       BC->replaceAllUsesWith(NewLoad);
00476       IC.EraseInstFromFunction(*BC);
00477       return &LI;
00478     }
00479 
00480   // FIXME: We should also canonicalize loads of vectors when their elements are
00481   // cast to other types.
00482   return nullptr;
00483 }
00484 
00485 // If we can determine that all possible objects pointed to by the provided
00486 // pointer value are, not only dereferenceable, but also definitively less than
00487 // or equal to the provided maximum size, then return true. Otherwise, return
00488 // false (constant global values and allocas fall into this category).
00489 //
00490 // FIXME: This should probably live in ValueTracking (or similar).
00491 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
00492                                      const DataLayout *DL) {
00493   SmallPtrSet<Value *, 4> Visited;
00494   SmallVector<Value *, 4> Worklist(1, V);
00495 
00496   do {
00497     Value *P = Worklist.pop_back_val();
00498     P = P->stripPointerCasts();
00499 
00500     if (!Visited.insert(P).second)
00501       continue;
00502 
00503     if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
00504       Worklist.push_back(SI->getTrueValue());
00505       Worklist.push_back(SI->getFalseValue());
00506       continue;
00507     }
00508 
00509     if (PHINode *PN = dyn_cast<PHINode>(P)) {
00510       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00511         Worklist.push_back(PN->getIncomingValue(i));
00512       continue;
00513     }
00514 
00515     if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
00516       if (GA->mayBeOverridden())
00517         return false;
00518       Worklist.push_back(GA->getAliasee());
00519       continue;
00520     }
00521 
00522     // If we know how big this object is, and it is less than MaxSize, continue
00523     // searching. Otherwise, return false.
00524     if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
00525       if (!AI->getAllocatedType()->isSized())
00526         return false;
00527 
00528       ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
00529       if (!CS)
00530         return false;
00531 
00532       uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType());
00533       // Make sure that, even if the multiplication below would wrap as an
00534       // uint64_t, we still do the right thing.
00535       if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
00536         return false;
00537       continue;
00538     }
00539 
00540     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
00541       if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
00542         return false;
00543 
00544       uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType());
00545       if (InitSize > MaxSize)
00546         return false;
00547       continue;
00548     }
00549 
00550     return false;
00551   } while (!Worklist.empty());
00552 
00553   return true;
00554 }
00555 
00556 // If we're indexing into an object of a known size, and the outer index is
00557 // not a constant, but having any value but zero would lead to undefined
00558 // behavior, replace it with zero.
00559 //
00560 // For example, if we have:
00561 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
00562 // ...
00563 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
00564 // ... = load i32* %arrayidx, align 4
00565 // Then we know that we can replace %x in the GEP with i64 0.
00566 //
00567 // FIXME: We could fold any GEP index to zero that would cause UB if it were
00568 // not zero. Currently, we only handle the first such index. Also, we could
00569 // also search through non-zero constant indices if we kept track of the
00570 // offsets those indices implied.
00571 static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
00572                                      Instruction *MemI, unsigned &Idx) {
00573   const DataLayout *DL = IC.getDataLayout();
00574   if (GEPI->getNumOperands() < 2 || !DL)
00575     return false;
00576 
00577   // Find the first non-zero index of a GEP. If all indices are zero, return
00578   // one past the last index.
00579   auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
00580     unsigned I = 1;
00581     for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
00582       Value *V = GEPI->getOperand(I);
00583       if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
00584         if (CI->isZero())
00585           continue;
00586 
00587       break;
00588     }
00589 
00590     return I;
00591   };
00592 
00593   // Skip through initial 'zero' indices, and find the corresponding pointer
00594   // type. See if the next index is not a constant.
00595   Idx = FirstNZIdx(GEPI);
00596   if (Idx == GEPI->getNumOperands())
00597     return false;
00598   if (isa<Constant>(GEPI->getOperand(Idx)))
00599     return false;
00600 
00601   SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
00602   Type *AllocTy =
00603     GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops);
00604   if (!AllocTy || !AllocTy->isSized())
00605     return false;
00606   uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy);
00607 
00608   // If there are more indices after the one we might replace with a zero, make
00609   // sure they're all non-negative. If any of them are negative, the overall
00610   // address being computed might be before the base address determined by the
00611   // first non-zero index.
00612   auto IsAllNonNegative = [&]() {
00613     for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
00614       bool KnownNonNegative, KnownNegative;
00615       IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
00616                         KnownNegative, 0, MemI);
00617       if (KnownNonNegative)
00618         continue;
00619       return false;
00620     }
00621 
00622     return true;
00623   };
00624 
00625   // FIXME: If the GEP is not inbounds, and there are extra indices after the
00626   // one we'll replace, those could cause the address computation to wrap
00627   // (rendering the IsAllNonNegative() check below insufficient). We can do
00628   // better, ignoring zero indicies (and other indicies we can prove small
00629   // enough not to wrap).
00630   if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
00631     return false;
00632 
00633   // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
00634   // also known to be dereferenceable.
00635   return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
00636          IsAllNonNegative();
00637 }
00638 
00639 // If we're indexing into an object with a variable index for the memory
00640 // access, but the object has only one element, we can assume that the index
00641 // will always be zero. If we replace the GEP, return it.
00642 template <typename T>
00643 static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
00644                                           T &MemI) {
00645   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
00646     unsigned Idx;
00647     if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
00648       Instruction *NewGEPI = GEPI->clone();
00649       NewGEPI->setOperand(Idx,
00650         ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
00651       NewGEPI->insertBefore(GEPI);
00652       MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
00653       return NewGEPI;
00654     }
00655   }
00656 
00657   return nullptr;
00658 }
00659 
00660 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
00661   Value *Op = LI.getOperand(0);
00662 
00663   // Try to canonicalize the loaded type.
00664   if (Instruction *Res = combineLoadToOperationType(*this, LI))
00665     return Res;
00666 
00667   // Attempt to improve the alignment.
00668   if (DL) {
00669     unsigned KnownAlign = getOrEnforceKnownAlignment(
00670         Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT);
00671     unsigned LoadAlign = LI.getAlignment();
00672     unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
00673       DL->getABITypeAlignment(LI.getType());
00674 
00675     if (KnownAlign > EffectiveLoadAlign)
00676       LI.setAlignment(KnownAlign);
00677     else if (LoadAlign == 0)
00678       LI.setAlignment(EffectiveLoadAlign);
00679   }
00680 
00681   // Replace GEP indices if possible.
00682   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
00683       Worklist.Add(NewGEPI);
00684       return &LI;
00685   }
00686 
00687   // None of the following transforms are legal for volatile/atomic loads.
00688   // FIXME: Some of it is okay for atomic loads; needs refactoring.
00689   if (!LI.isSimple()) return nullptr;
00690 
00691   // Do really simple store-to-load forwarding and load CSE, to catch cases
00692   // where there are several consecutive memory accesses to the same location,
00693   // separated by a few arithmetic operations.
00694   BasicBlock::iterator BBI = &LI;
00695   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
00696     return ReplaceInstUsesWith(
00697         LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
00698                                             LI.getName() + ".cast"));
00699 
00700   // load(gep null, ...) -> unreachable
00701   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
00702     const Value *GEPI0 = GEPI->getOperand(0);
00703     // TODO: Consider a target hook for valid address spaces for this xform.
00704     if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
00705       // Insert a new store to null instruction before the load to indicate
00706       // that this code is not reachable.  We do this instead of inserting
00707       // an unreachable instruction directly because we cannot modify the
00708       // CFG.
00709       new StoreInst(UndefValue::get(LI.getType()),
00710                     Constant::getNullValue(Op->getType()), &LI);
00711       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00712     }
00713   }
00714 
00715   // load null/undef -> unreachable
00716   // TODO: Consider a target hook for valid address spaces for this xform.
00717   if (isa<UndefValue>(Op) ||
00718       (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
00719     // Insert a new store to null instruction before the load to indicate that
00720     // this code is not reachable.  We do this instead of inserting an
00721     // unreachable instruction directly because we cannot modify the CFG.
00722     new StoreInst(UndefValue::get(LI.getType()),
00723                   Constant::getNullValue(Op->getType()), &LI);
00724     return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00725   }
00726 
00727   if (Op->hasOneUse()) {
00728     // Change select and PHI nodes to select values instead of addresses: this
00729     // helps alias analysis out a lot, allows many others simplifications, and
00730     // exposes redundancy in the code.
00731     //
00732     // Note that we cannot do the transformation unless we know that the
00733     // introduced loads cannot trap!  Something like this is valid as long as
00734     // the condition is always false: load (select bool %C, int* null, int* %G),
00735     // but it would not be valid if we transformed it to load from null
00736     // unconditionally.
00737     //
00738     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
00739       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
00740       unsigned Align = LI.getAlignment();
00741       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
00742           isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
00743         LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
00744                                            SI->getOperand(1)->getName()+".val");
00745         LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
00746                                            SI->getOperand(2)->getName()+".val");
00747         V1->setAlignment(Align);
00748         V2->setAlignment(Align);
00749         return SelectInst::Create(SI->getCondition(), V1, V2);
00750       }
00751 
00752       // load (select (cond, null, P)) -> load P
00753       if (isa<ConstantPointerNull>(SI->getOperand(1)) && 
00754           LI.getPointerAddressSpace() == 0) {
00755         LI.setOperand(0, SI->getOperand(2));
00756         return &LI;
00757       }
00758 
00759       // load (select (cond, P, null)) -> load P
00760       if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
00761           LI.getPointerAddressSpace() == 0) {
00762         LI.setOperand(0, SI->getOperand(1));
00763         return &LI;
00764       }
00765     }
00766   }
00767   return nullptr;
00768 }
00769 
00770 /// \brief Combine stores to match the type of value being stored.
00771 ///
00772 /// The core idea here is that the memory does not have any intrinsic type and
00773 /// where we can we should match the type of a store to the type of value being
00774 /// stored.
00775 ///
00776 /// However, this routine must never change the width of a store or the number of
00777 /// stores as that would introduce a semantic change. This combine is expected to
00778 /// be a semantic no-op which just allows stores to more closely model the types
00779 /// of their incoming values.
00780 ///
00781 /// Currently, we also refuse to change the precise type used for an atomic or
00782 /// volatile store. This is debatable, and might be reasonable to change later.
00783 /// However, it is risky in case some backend or other part of LLVM is relying
00784 /// on the exact type stored to select appropriate atomic operations.
00785 ///
00786 /// \returns true if the store was successfully combined away. This indicates
00787 /// the caller must erase the store instruction. We have to let the caller erase
00788 /// the store instruction sas otherwise there is no way to signal whether it was
00789 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
00790 static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
00791   // FIXME: We could probably with some care handle both volatile and atomic
00792   // stores here but it isn't clear that this is important.
00793   if (!SI.isSimple())
00794     return false;
00795 
00796   Value *V = SI.getValueOperand();
00797 
00798   // Fold away bit casts of the stored value by storing the original type.
00799   if (auto *BC = dyn_cast<BitCastInst>(V)) {
00800     V = BC->getOperand(0);
00801     combineStoreToNewValue(IC, SI, V);
00802     return true;
00803   }
00804 
00805   // FIXME: We should also canonicalize loads of vectors when their elements are
00806   // cast to other types.
00807   return false;
00808 }
00809 
00810 /// equivalentAddressValues - Test if A and B will obviously have the same
00811 /// value. This includes recognizing that %t0 and %t1 will have the same
00812 /// value in code like this:
00813 ///   %t0 = getelementptr \@a, 0, 3
00814 ///   store i32 0, i32* %t0
00815 ///   %t1 = getelementptr \@a, 0, 3
00816 ///   %t2 = load i32* %t1
00817 ///
00818 static bool equivalentAddressValues(Value *A, Value *B) {
00819   // Test if the values are trivially equivalent.
00820   if (A == B) return true;
00821 
00822   // Test if the values come form identical arithmetic instructions.
00823   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
00824   // its only used to compare two uses within the same basic block, which
00825   // means that they'll always either have the same value or one of them
00826   // will have an undefined value.
00827   if (isa<BinaryOperator>(A) ||
00828       isa<CastInst>(A) ||
00829       isa<PHINode>(A) ||
00830       isa<GetElementPtrInst>(A))
00831     if (Instruction *BI = dyn_cast<Instruction>(B))
00832       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
00833         return true;
00834 
00835   // Otherwise they may not be equivalent.
00836   return false;
00837 }
00838 
00839 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
00840   Value *Val = SI.getOperand(0);
00841   Value *Ptr = SI.getOperand(1);
00842 
00843   // Try to canonicalize the stored type.
00844   if (combineStoreToValueType(*this, SI))
00845     return EraseInstFromFunction(SI);
00846 
00847   // Attempt to improve the alignment.
00848   if (DL) {
00849     unsigned KnownAlign = getOrEnforceKnownAlignment(
00850         Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT);
00851     unsigned StoreAlign = SI.getAlignment();
00852     unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
00853       DL->getABITypeAlignment(Val->getType());
00854 
00855     if (KnownAlign > EffectiveStoreAlign)
00856       SI.setAlignment(KnownAlign);
00857     else if (StoreAlign == 0)
00858       SI.setAlignment(EffectiveStoreAlign);
00859   }
00860 
00861   // Replace GEP indices if possible.
00862   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
00863       Worklist.Add(NewGEPI);
00864       return &SI;
00865   }
00866 
00867   // Don't hack volatile/atomic stores.
00868   // FIXME: Some bits are legal for atomic stores; needs refactoring.
00869   if (!SI.isSimple()) return nullptr;
00870 
00871   // If the RHS is an alloca with a single use, zapify the store, making the
00872   // alloca dead.
00873   if (Ptr->hasOneUse()) {
00874     if (isa<AllocaInst>(Ptr))
00875       return EraseInstFromFunction(SI);
00876     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
00877       if (isa<AllocaInst>(GEP->getOperand(0))) {
00878         if (GEP->getOperand(0)->hasOneUse())
00879           return EraseInstFromFunction(SI);
00880       }
00881     }
00882   }
00883 
00884   // Do really simple DSE, to catch cases where there are several consecutive
00885   // stores to the same location, separated by a few arithmetic operations. This
00886   // situation often occurs with bitfield accesses.
00887   BasicBlock::iterator BBI = &SI;
00888   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
00889        --ScanInsts) {
00890     --BBI;
00891     // Don't count debug info directives, lest they affect codegen,
00892     // and we skip pointer-to-pointer bitcasts, which are NOPs.
00893     if (isa<DbgInfoIntrinsic>(BBI) ||
00894         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
00895       ScanInsts++;
00896       continue;
00897     }
00898 
00899     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
00900       // Prev store isn't volatile, and stores to the same location?
00901       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
00902                                                         SI.getOperand(1))) {
00903         ++NumDeadStore;
00904         ++BBI;
00905         EraseInstFromFunction(*PrevSI);
00906         continue;
00907       }
00908       break;
00909     }
00910 
00911     // If this is a load, we have to stop.  However, if the loaded value is from
00912     // the pointer we're loading and is producing the pointer we're storing,
00913     // then *this* store is dead (X = load P; store X -> P).
00914     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
00915       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
00916           LI->isSimple())
00917         return EraseInstFromFunction(SI);
00918 
00919       // Otherwise, this is a load from some other location.  Stores before it
00920       // may not be dead.
00921       break;
00922     }
00923 
00924     // Don't skip over loads or things that can modify memory.
00925     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
00926       break;
00927   }
00928 
00929   // store X, null    -> turns into 'unreachable' in SimplifyCFG
00930   if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
00931     if (!isa<UndefValue>(Val)) {
00932       SI.setOperand(0, UndefValue::get(Val->getType()));
00933       if (Instruction *U = dyn_cast<Instruction>(Val))
00934         Worklist.Add(U);  // Dropped a use.
00935     }
00936     return nullptr;  // Do not modify these!
00937   }
00938 
00939   // store undef, Ptr -> noop
00940   if (isa<UndefValue>(Val))
00941     return EraseInstFromFunction(SI);
00942 
00943   // If this store is the last instruction in the basic block (possibly
00944   // excepting debug info instructions), and if the block ends with an
00945   // unconditional branch, try to move it to the successor block.
00946   BBI = &SI;
00947   do {
00948     ++BBI;
00949   } while (isa<DbgInfoIntrinsic>(BBI) ||
00950            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
00951   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
00952     if (BI->isUnconditional())
00953       if (SimplifyStoreAtEndOfBlock(SI))
00954         return nullptr;  // xform done!
00955 
00956   return nullptr;
00957 }
00958 
00959 /// SimplifyStoreAtEndOfBlock - Turn things like:
00960 ///   if () { *P = v1; } else { *P = v2 }
00961 /// into a phi node with a store in the successor.
00962 ///
00963 /// Simplify things like:
00964 ///   *P = v1; if () { *P = v2; }
00965 /// into a phi node with a store in the successor.
00966 ///
00967 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
00968   BasicBlock *StoreBB = SI.getParent();
00969 
00970   // Check to see if the successor block has exactly two incoming edges.  If
00971   // so, see if the other predecessor contains a store to the same location.
00972   // if so, insert a PHI node (if needed) and move the stores down.
00973   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
00974 
00975   // Determine whether Dest has exactly two predecessors and, if so, compute
00976   // the other predecessor.
00977   pred_iterator PI = pred_begin(DestBB);
00978   BasicBlock *P = *PI;
00979   BasicBlock *OtherBB = nullptr;
00980 
00981   if (P != StoreBB)
00982     OtherBB = P;
00983 
00984   if (++PI == pred_end(DestBB))
00985     return false;
00986 
00987   P = *PI;
00988   if (P != StoreBB) {
00989     if (OtherBB)
00990       return false;
00991     OtherBB = P;
00992   }
00993   if (++PI != pred_end(DestBB))
00994     return false;
00995 
00996   // Bail out if all the relevant blocks aren't distinct (this can happen,
00997   // for example, if SI is in an infinite loop)
00998   if (StoreBB == DestBB || OtherBB == DestBB)
00999     return false;
01000 
01001   // Verify that the other block ends in a branch and is not otherwise empty.
01002   BasicBlock::iterator BBI = OtherBB->getTerminator();
01003   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
01004   if (!OtherBr || BBI == OtherBB->begin())
01005     return false;
01006 
01007   // If the other block ends in an unconditional branch, check for the 'if then
01008   // else' case.  there is an instruction before the branch.
01009   StoreInst *OtherStore = nullptr;
01010   if (OtherBr->isUnconditional()) {
01011     --BBI;
01012     // Skip over debugging info.
01013     while (isa<DbgInfoIntrinsic>(BBI) ||
01014            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
01015       if (BBI==OtherBB->begin())
01016         return false;
01017       --BBI;
01018     }
01019     // If this isn't a store, isn't a store to the same location, or is not the
01020     // right kind of store, bail out.
01021     OtherStore = dyn_cast<StoreInst>(BBI);
01022     if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
01023         !SI.isSameOperationAs(OtherStore))
01024       return false;
01025   } else {
01026     // Otherwise, the other block ended with a conditional branch. If one of the
01027     // destinations is StoreBB, then we have the if/then case.
01028     if (OtherBr->getSuccessor(0) != StoreBB &&
01029         OtherBr->getSuccessor(1) != StoreBB)
01030       return false;
01031 
01032     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
01033     // if/then triangle.  See if there is a store to the same ptr as SI that
01034     // lives in OtherBB.
01035     for (;; --BBI) {
01036       // Check to see if we find the matching store.
01037       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
01038         if (OtherStore->getOperand(1) != SI.getOperand(1) ||
01039             !SI.isSameOperationAs(OtherStore))
01040           return false;
01041         break;
01042       }
01043       // If we find something that may be using or overwriting the stored
01044       // value, or if we run out of instructions, we can't do the xform.
01045       if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
01046           BBI == OtherBB->begin())
01047         return false;
01048     }
01049 
01050     // In order to eliminate the store in OtherBr, we have to
01051     // make sure nothing reads or overwrites the stored value in
01052     // StoreBB.
01053     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
01054       // FIXME: This should really be AA driven.
01055       if (I->mayReadFromMemory() || I->mayWriteToMemory())
01056         return false;
01057     }
01058   }
01059 
01060   // Insert a PHI node now if we need it.
01061   Value *MergedVal = OtherStore->getOperand(0);
01062   if (MergedVal != SI.getOperand(0)) {
01063     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
01064     PN->addIncoming(SI.getOperand(0), SI.getParent());
01065     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
01066     MergedVal = InsertNewInstBefore(PN, DestBB->front());
01067   }
01068 
01069   // Advance to a place where it is safe to insert the new store and
01070   // insert it.
01071   BBI = DestBB->getFirstInsertionPt();
01072   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
01073                                    SI.isVolatile(),
01074                                    SI.getAlignment(),
01075                                    SI.getOrdering(),
01076                                    SI.getSynchScope());
01077   InsertNewInstBefore(NewSI, *BBI);
01078   NewSI->setDebugLoc(OtherStore->getDebugLoc());
01079 
01080   // If the two stores had AA tags, merge them.
01081   AAMDNodes AATags;
01082   SI.getAAMetadata(AATags);
01083   if (AATags) {
01084     OtherStore->getAAMetadata(AATags, /* Merge = */ true);
01085     NewSI->setAAMetadata(AATags);
01086   }
01087 
01088   // Nuke the old stores.
01089   EraseInstFromFunction(SI);
01090   EraseInstFromFunction(*OtherStore);
01091   return true;
01092 }