LLVM  mainline
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
00001 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the visit functions for load, store and alloca.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "InstCombineInternal.h"
00015 #include "llvm/ADT/Statistic.h"
00016 #include "llvm/Analysis/Loads.h"
00017 #include "llvm/IR/DataLayout.h"
00018 #include "llvm/IR/LLVMContext.h"
00019 #include "llvm/IR/IntrinsicInst.h"
00020 #include "llvm/IR/MDBuilder.h"
00021 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00022 #include "llvm/Transforms/Utils/Local.h"
00023 using namespace llvm;
00024 
00025 #define DEBUG_TYPE "instcombine"
00026 
00027 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
00028 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
00029 
00030 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
00031 /// some part of a constant global variable.  This intentionally only accepts
00032 /// constant expressions because we can't rewrite arbitrary instructions.
00033 static bool pointsToConstantGlobal(Value *V) {
00034   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00035     return GV->isConstant();
00036 
00037   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
00038     if (CE->getOpcode() == Instruction::BitCast ||
00039         CE->getOpcode() == Instruction::AddrSpaceCast ||
00040         CE->getOpcode() == Instruction::GetElementPtr)
00041       return pointsToConstantGlobal(CE->getOperand(0));
00042   }
00043   return false;
00044 }
00045 
00046 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
00047 /// pointer to an alloca.  Ignore any reads of the pointer, return false if we
00048 /// see any stores or other unknown uses.  If we see pointer arithmetic, keep
00049 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
00050 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
00051 /// the alloca, and if the source pointer is a pointer to a constant global, we
00052 /// can optimize this.
00053 static bool
00054 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
00055                                SmallVectorImpl<Instruction *> &ToDelete) {
00056   // We track lifetime intrinsics as we encounter them.  If we decide to go
00057   // ahead and replace the value with the global, this lets the caller quickly
00058   // eliminate the markers.
00059 
00060   SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
00061   ValuesToInspect.push_back(std::make_pair(V, false));
00062   while (!ValuesToInspect.empty()) {
00063     auto ValuePair = ValuesToInspect.pop_back_val();
00064     const bool IsOffset = ValuePair.second;
00065     for (auto &U : ValuePair.first->uses()) {
00066       Instruction *I = cast<Instruction>(U.getUser());
00067 
00068       if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
00069         // Ignore non-volatile loads, they are always ok.
00070         if (!LI->isSimple()) return false;
00071         continue;
00072       }
00073 
00074       if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
00075         // If uses of the bitcast are ok, we are ok.
00076         ValuesToInspect.push_back(std::make_pair(I, IsOffset));
00077         continue;
00078       }
00079       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
00080         // If the GEP has all zero indices, it doesn't offset the pointer. If it
00081         // doesn't, it does.
00082         ValuesToInspect.push_back(
00083             std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
00084         continue;
00085       }
00086 
00087       if (auto CS = CallSite(I)) {
00088         // If this is the function being called then we treat it like a load and
00089         // ignore it.
00090         if (CS.isCallee(&U))
00091           continue;
00092 
00093         // Inalloca arguments are clobbered by the call.
00094         unsigned ArgNo = CS.getArgumentNo(&U);
00095         if (CS.isInAllocaArgument(ArgNo))
00096           return false;
00097 
00098         // If this is a readonly/readnone call site, then we know it is just a
00099         // load (but one that potentially returns the value itself), so we can
00100         // ignore it if we know that the value isn't captured.
00101         if (CS.onlyReadsMemory() &&
00102             (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
00103           continue;
00104 
00105         // If this is being passed as a byval argument, the caller is making a
00106         // copy, so it is only a read of the alloca.
00107         if (CS.isByValArgument(ArgNo))
00108           continue;
00109       }
00110 
00111       // Lifetime intrinsics can be handled by the caller.
00112       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
00113         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
00114             II->getIntrinsicID() == Intrinsic::lifetime_end) {
00115           assert(II->use_empty() && "Lifetime markers have no result to use!");
00116           ToDelete.push_back(II);
00117           continue;
00118         }
00119       }
00120 
00121       // If this is isn't our memcpy/memmove, reject it as something we can't
00122       // handle.
00123       MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
00124       if (!MI)
00125         return false;
00126 
00127       // If the transfer is using the alloca as a source of the transfer, then
00128       // ignore it since it is a load (unless the transfer is volatile).
00129       if (U.getOperandNo() == 1) {
00130         if (MI->isVolatile()) return false;
00131         continue;
00132       }
00133 
00134       // If we already have seen a copy, reject the second one.
00135       if (TheCopy) return false;
00136 
00137       // If the pointer has been offset from the start of the alloca, we can't
00138       // safely handle this.
00139       if (IsOffset) return false;
00140 
00141       // If the memintrinsic isn't using the alloca as the dest, reject it.
00142       if (U.getOperandNo() != 0) return false;
00143 
00144       // If the source of the memcpy/move is not a constant global, reject it.
00145       if (!pointsToConstantGlobal(MI->getSource()))
00146         return false;
00147 
00148       // Otherwise, the transform is safe.  Remember the copy instruction.
00149       TheCopy = MI;
00150     }
00151   }
00152   return true;
00153 }
00154 
00155 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
00156 /// modified by a copy from a constant global.  If we can prove this, we can
00157 /// replace any uses of the alloca with uses of the global directly.
00158 static MemTransferInst *
00159 isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
00160                                SmallVectorImpl<Instruction *> &ToDelete) {
00161   MemTransferInst *TheCopy = nullptr;
00162   if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
00163     return TheCopy;
00164   return nullptr;
00165 }
00166 
00167 static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
00168   // Check for array size of 1 (scalar allocation).
00169   if (!AI.isArrayAllocation()) {
00170     // i32 1 is the canonical array size for scalar allocations.
00171     if (AI.getArraySize()->getType()->isIntegerTy(32))
00172       return nullptr;
00173 
00174     // Canonicalize it.
00175     Value *V = IC.Builder->getInt32(1);
00176     AI.setOperand(0, V);
00177     return &AI;
00178   }
00179 
00180   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
00181   if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
00182     Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
00183     AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
00184     New->setAlignment(AI.getAlignment());
00185 
00186     // Scan to the end of the allocation instructions, to skip over a block of
00187     // allocas if possible...also skip interleaved debug info
00188     //
00189     BasicBlock::iterator It = New;
00190     while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
00191       ++It;
00192 
00193     // Now that I is pointing to the first non-allocation-inst in the block,
00194     // insert our getelementptr instruction...
00195     //
00196     Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
00197     Value *NullIdx = Constant::getNullValue(IdxTy);
00198     Value *Idx[2] = {NullIdx, NullIdx};
00199     Instruction *GEP =
00200         GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
00201     IC.InsertNewInstBefore(GEP, *It);
00202 
00203     // Now make everything use the getelementptr instead of the original
00204     // allocation.
00205     return IC.ReplaceInstUsesWith(AI, GEP);
00206   }
00207 
00208   if (isa<UndefValue>(AI.getArraySize()))
00209     return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
00210 
00211   // Ensure that the alloca array size argument has type intptr_t, so that
00212   // any casting is exposed early.
00213   Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
00214   if (AI.getArraySize()->getType() != IntPtrTy) {
00215     Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
00216     AI.setOperand(0, V);
00217     return &AI;
00218   }
00219 
00220   return nullptr;
00221 }
00222 
00223 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
00224   if (auto *I = simplifyAllocaArraySize(*this, AI))
00225     return I;
00226 
00227   if (AI.getAllocatedType()->isSized()) {
00228     // If the alignment is 0 (unspecified), assign it the preferred alignment.
00229     if (AI.getAlignment() == 0)
00230       AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
00231 
00232     // Move all alloca's of zero byte objects to the entry block and merge them
00233     // together.  Note that we only do this for alloca's, because malloc should
00234     // allocate and return a unique pointer, even for a zero byte allocation.
00235     if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
00236       // For a zero sized alloca there is no point in doing an array allocation.
00237       // This is helpful if the array size is a complicated expression not used
00238       // elsewhere.
00239       if (AI.isArrayAllocation()) {
00240         AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
00241         return &AI;
00242       }
00243 
00244       // Get the first instruction in the entry block.
00245       BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
00246       Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
00247       if (FirstInst != &AI) {
00248         // If the entry block doesn't start with a zero-size alloca then move
00249         // this one to the start of the entry block.  There is no problem with
00250         // dominance as the array size was forced to a constant earlier already.
00251         AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
00252         if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
00253             DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
00254           AI.moveBefore(FirstInst);
00255           return &AI;
00256         }
00257 
00258         // If the alignment of the entry block alloca is 0 (unspecified),
00259         // assign it the preferred alignment.
00260         if (EntryAI->getAlignment() == 0)
00261           EntryAI->setAlignment(
00262               DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
00263         // Replace this zero-sized alloca with the one at the start of the entry
00264         // block after ensuring that the address will be aligned enough for both
00265         // types.
00266         unsigned MaxAlign = std::max(EntryAI->getAlignment(),
00267                                      AI.getAlignment());
00268         EntryAI->setAlignment(MaxAlign);
00269         if (AI.getType() != EntryAI->getType())
00270           return new BitCastInst(EntryAI, AI.getType());
00271         return ReplaceInstUsesWith(AI, EntryAI);
00272       }
00273     }
00274   }
00275 
00276   if (AI.getAlignment()) {
00277     // Check to see if this allocation is only modified by a memcpy/memmove from
00278     // a constant global whose alignment is equal to or exceeds that of the
00279     // allocation.  If this is the case, we can change all users to use
00280     // the constant global instead.  This is commonly produced by the CFE by
00281     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
00282     // is only subsequently read.
00283     SmallVector<Instruction *, 4> ToDelete;
00284     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
00285       unsigned SourceAlign = getOrEnforceKnownAlignment(
00286           Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
00287       if (AI.getAlignment() <= SourceAlign) {
00288         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
00289         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
00290         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
00291           EraseInstFromFunction(*ToDelete[i]);
00292         Constant *TheSrc = cast<Constant>(Copy->getSource());
00293         Constant *Cast
00294           = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
00295         Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
00296         EraseInstFromFunction(*Copy);
00297         ++NumGlobalCopies;
00298         return NewI;
00299       }
00300     }
00301   }
00302 
00303   // At last, use the generic allocation site handler to aggressively remove
00304   // unused allocas.
00305   return visitAllocSite(AI);
00306 }
00307 
00308 /// \brief Helper to combine a load to a new type.
00309 ///
00310 /// This just does the work of combining a load to a new type. It handles
00311 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
00312 /// loaded *value* type. This will convert it to a pointer, cast the operand to
00313 /// that pointer type, load it, etc.
00314 ///
00315 /// Note that this will create all of the instructions with whatever insert
00316 /// point the \c InstCombiner currently is using.
00317 static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
00318                                       const Twine &Suffix = "") {
00319   Value *Ptr = LI.getPointerOperand();
00320   unsigned AS = LI.getPointerAddressSpace();
00321   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
00322   LI.getAllMetadata(MD);
00323 
00324   LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
00325       IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
00326       LI.getAlignment(), LI.getName() + Suffix);
00327   MDBuilder MDB(NewLoad->getContext());
00328   for (const auto &MDPair : MD) {
00329     unsigned ID = MDPair.first;
00330     MDNode *N = MDPair.second;
00331     // Note, essentially every kind of metadata should be preserved here! This
00332     // routine is supposed to clone a load instruction changing *only its type*.
00333     // The only metadata it makes sense to drop is metadata which is invalidated
00334     // when the pointer type changes. This should essentially never be the case
00335     // in LLVM, but we explicitly switch over only known metadata to be
00336     // conservatively correct. If you are adding metadata to LLVM which pertains
00337     // to loads, you almost certainly want to add it here.
00338     switch (ID) {
00339     case LLVMContext::MD_dbg:
00340     case LLVMContext::MD_tbaa:
00341     case LLVMContext::MD_prof:
00342     case LLVMContext::MD_fpmath:
00343     case LLVMContext::MD_tbaa_struct:
00344     case LLVMContext::MD_invariant_load:
00345     case LLVMContext::MD_alias_scope:
00346     case LLVMContext::MD_noalias:
00347     case LLVMContext::MD_nontemporal:
00348     case LLVMContext::MD_mem_parallel_loop_access:
00349       // All of these directly apply.
00350       NewLoad->setMetadata(ID, N);
00351       break;
00352 
00353     case LLVMContext::MD_nonnull:
00354       // This only directly applies if the new type is also a pointer.
00355       if (NewTy->isPointerTy()) {
00356         NewLoad->setMetadata(ID, N);
00357         break;
00358       }
00359       // If it's integral now, translate it to !range metadata.
00360       if (NewTy->isIntegerTy()) {
00361         auto *ITy = cast<IntegerType>(NewTy);
00362         auto *NullInt = ConstantExpr::getPtrToInt(
00363             ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
00364         auto *NonNullInt =
00365             ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
00366         NewLoad->setMetadata(LLVMContext::MD_range,
00367                              MDB.createRange(NonNullInt, NullInt));
00368       }
00369       break;
00370 
00371     case LLVMContext::MD_range:
00372       // FIXME: It would be nice to propagate this in some way, but the type
00373       // conversions make it hard. If the new type is a pointer, we could
00374       // translate it to !nonnull metadata.
00375       break;
00376     }
00377   }
00378   return NewLoad;
00379 }
00380 
00381 /// \brief Combine a store to a new type.
00382 ///
00383 /// Returns the newly created store instruction.
00384 static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
00385   Value *Ptr = SI.getPointerOperand();
00386   unsigned AS = SI.getPointerAddressSpace();
00387   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
00388   SI.getAllMetadata(MD);
00389 
00390   StoreInst *NewStore = IC.Builder->CreateAlignedStore(
00391       V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
00392       SI.getAlignment());
00393   for (const auto &MDPair : MD) {
00394     unsigned ID = MDPair.first;
00395     MDNode *N = MDPair.second;
00396     // Note, essentially every kind of metadata should be preserved here! This
00397     // routine is supposed to clone a store instruction changing *only its
00398     // type*. The only metadata it makes sense to drop is metadata which is
00399     // invalidated when the pointer type changes. This should essentially
00400     // never be the case in LLVM, but we explicitly switch over only known
00401     // metadata to be conservatively correct. If you are adding metadata to
00402     // LLVM which pertains to stores, you almost certainly want to add it
00403     // here.
00404     switch (ID) {
00405     case LLVMContext::MD_dbg:
00406     case LLVMContext::MD_tbaa:
00407     case LLVMContext::MD_prof:
00408     case LLVMContext::MD_fpmath:
00409     case LLVMContext::MD_tbaa_struct:
00410     case LLVMContext::MD_alias_scope:
00411     case LLVMContext::MD_noalias:
00412     case LLVMContext::MD_nontemporal:
00413     case LLVMContext::MD_mem_parallel_loop_access:
00414       // All of these directly apply.
00415       NewStore->setMetadata(ID, N);
00416       break;
00417 
00418     case LLVMContext::MD_invariant_load:
00419     case LLVMContext::MD_nonnull:
00420     case LLVMContext::MD_range:
00421       // These don't apply for stores.
00422       break;
00423     }
00424   }
00425 
00426   return NewStore;
00427 }
00428 
00429 /// \brief Combine loads to match the type of value their uses after looking
00430 /// through intervening bitcasts.
00431 ///
00432 /// The core idea here is that if the result of a load is used in an operation,
00433 /// we should load the type most conducive to that operation. For example, when
00434 /// loading an integer and converting that immediately to a pointer, we should
00435 /// instead directly load a pointer.
00436 ///
00437 /// However, this routine must never change the width of a load or the number of
00438 /// loads as that would introduce a semantic change. This combine is expected to
00439 /// be a semantic no-op which just allows loads to more closely model the types
00440 /// of their consuming operations.
00441 ///
00442 /// Currently, we also refuse to change the precise type used for an atomic load
00443 /// or a volatile load. This is debatable, and might be reasonable to change
00444 /// later. However, it is risky in case some backend or other part of LLVM is
00445 /// relying on the exact type loaded to select appropriate atomic operations.
00446 static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
00447   // FIXME: We could probably with some care handle both volatile and atomic
00448   // loads here but it isn't clear that this is important.
00449   if (!LI.isSimple())
00450     return nullptr;
00451 
00452   if (LI.use_empty())
00453     return nullptr;
00454 
00455   Type *Ty = LI.getType();
00456   const DataLayout &DL = IC.getDataLayout();
00457 
00458   // Try to canonicalize loads which are only ever stored to operate over
00459   // integers instead of any other type. We only do this when the loaded type
00460   // is sized and has a size exactly the same as its store size and the store
00461   // size is a legal integer type.
00462   if (!Ty->isIntegerTy() && Ty->isSized() &&
00463       DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
00464       DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
00465     if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
00466           auto *SI = dyn_cast<StoreInst>(U);
00467           return SI && SI->getPointerOperand() != &LI;
00468         })) {
00469       LoadInst *NewLoad = combineLoadToNewType(
00470           IC, LI,
00471           Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
00472       // Replace all the stores with stores of the newly loaded value.
00473       for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
00474         auto *SI = cast<StoreInst>(*UI++);
00475         IC.Builder->SetInsertPoint(SI);
00476         combineStoreToNewValue(IC, *SI, NewLoad);
00477         IC.EraseInstFromFunction(*SI);
00478       }
00479       assert(LI.use_empty() && "Failed to remove all users of the load!");
00480       // Return the old load so the combiner can delete it safely.
00481       return &LI;
00482     }
00483   }
00484 
00485   // Fold away bit casts of the loaded value by loading the desired type.
00486   // We can do this for BitCastInsts as well as casts from and to pointer types,
00487   // as long as those are noops (i.e., the source or dest type have the same
00488   // bitwidth as the target's pointers).
00489   if (LI.hasOneUse())
00490     if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {
00491       if (CI->isNoopCast(DL)) {
00492         LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
00493         CI->replaceAllUsesWith(NewLoad);
00494         IC.EraseInstFromFunction(*CI);
00495         return &LI;
00496       }
00497     }
00498 
00499   // FIXME: We should also canonicalize loads of vectors when their elements are
00500   // cast to other types.
00501   return nullptr;
00502 }
00503 
00504 static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
00505   // FIXME: We could probably with some care handle both volatile and atomic
00506   // stores here but it isn't clear that this is important.
00507   if (!LI.isSimple())
00508     return nullptr;
00509 
00510   Type *T = LI.getType();
00511   if (!T->isAggregateType())
00512     return nullptr;
00513 
00514   assert(LI.getAlignment() && "Alignement must be set at this point");
00515 
00516   if (auto *ST = dyn_cast<StructType>(T)) {
00517     // If the struct only have one element, we unpack.
00518     if (ST->getNumElements() == 1) {
00519       LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
00520                                                ".unpack");
00521       return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
00522         UndefValue::get(T), NewLoad, 0, LI.getName()));
00523     }
00524   }
00525 
00526   if (auto *AT = dyn_cast<ArrayType>(T)) {
00527     // If the array only have one element, we unpack.
00528     if (AT->getNumElements() == 1) {
00529       LoadInst *NewLoad = combineLoadToNewType(IC, LI, AT->getElementType(),
00530                                                ".unpack");
00531       return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
00532         UndefValue::get(T), NewLoad, 0, LI.getName()));
00533     }
00534   }
00535 
00536   return nullptr;
00537 }
00538 
00539 // If we can determine that all possible objects pointed to by the provided
00540 // pointer value are, not only dereferenceable, but also definitively less than
00541 // or equal to the provided maximum size, then return true. Otherwise, return
00542 // false (constant global values and allocas fall into this category).
00543 //
00544 // FIXME: This should probably live in ValueTracking (or similar).
00545 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
00546                                      const DataLayout &DL) {
00547   SmallPtrSet<Value *, 4> Visited;
00548   SmallVector<Value *, 4> Worklist(1, V);
00549 
00550   do {
00551     Value *P = Worklist.pop_back_val();
00552     P = P->stripPointerCasts();
00553 
00554     if (!Visited.insert(P).second)
00555       continue;
00556 
00557     if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
00558       Worklist.push_back(SI->getTrueValue());
00559       Worklist.push_back(SI->getFalseValue());
00560       continue;
00561     }
00562 
00563     if (PHINode *PN = dyn_cast<PHINode>(P)) {
00564       for (Value *IncValue : PN->incoming_values())
00565         Worklist.push_back(IncValue);
00566       continue;
00567     }
00568 
00569     if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
00570       if (GA->mayBeOverridden())
00571         return false;
00572       Worklist.push_back(GA->getAliasee());
00573       continue;
00574     }
00575 
00576     // If we know how big this object is, and it is less than MaxSize, continue
00577     // searching. Otherwise, return false.
00578     if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
00579       if (!AI->getAllocatedType()->isSized())
00580         return false;
00581 
00582       ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
00583       if (!CS)
00584         return false;
00585 
00586       uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
00587       // Make sure that, even if the multiplication below would wrap as an
00588       // uint64_t, we still do the right thing.
00589       if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
00590         return false;
00591       continue;
00592     }
00593 
00594     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
00595       if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
00596         return false;
00597 
00598       uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
00599       if (InitSize > MaxSize)
00600         return false;
00601       continue;
00602     }
00603 
00604     return false;
00605   } while (!Worklist.empty());
00606 
00607   return true;
00608 }
00609 
00610 // If we're indexing into an object of a known size, and the outer index is
00611 // not a constant, but having any value but zero would lead to undefined
00612 // behavior, replace it with zero.
00613 //
00614 // For example, if we have:
00615 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
00616 // ...
00617 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
00618 // ... = load i32* %arrayidx, align 4
00619 // Then we know that we can replace %x in the GEP with i64 0.
00620 //
00621 // FIXME: We could fold any GEP index to zero that would cause UB if it were
00622 // not zero. Currently, we only handle the first such index. Also, we could
00623 // also search through non-zero constant indices if we kept track of the
00624 // offsets those indices implied.
00625 static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
00626                                      Instruction *MemI, unsigned &Idx) {
00627   if (GEPI->getNumOperands() < 2)
00628     return false;
00629 
00630   // Find the first non-zero index of a GEP. If all indices are zero, return
00631   // one past the last index.
00632   auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
00633     unsigned I = 1;
00634     for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
00635       Value *V = GEPI->getOperand(I);
00636       if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
00637         if (CI->isZero())
00638           continue;
00639 
00640       break;
00641     }
00642 
00643     return I;
00644   };
00645 
00646   // Skip through initial 'zero' indices, and find the corresponding pointer
00647   // type. See if the next index is not a constant.
00648   Idx = FirstNZIdx(GEPI);
00649   if (Idx == GEPI->getNumOperands())
00650     return false;
00651   if (isa<Constant>(GEPI->getOperand(Idx)))
00652     return false;
00653 
00654   SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
00655   Type *AllocTy = GetElementPtrInst::getIndexedType(
00656       cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
00657           ->getElementType(),
00658       Ops);
00659   if (!AllocTy || !AllocTy->isSized())
00660     return false;
00661   const DataLayout &DL = IC.getDataLayout();
00662   uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
00663 
00664   // If there are more indices after the one we might replace with a zero, make
00665   // sure they're all non-negative. If any of them are negative, the overall
00666   // address being computed might be before the base address determined by the
00667   // first non-zero index.
00668   auto IsAllNonNegative = [&]() {
00669     for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
00670       bool KnownNonNegative, KnownNegative;
00671       IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
00672                         KnownNegative, 0, MemI);
00673       if (KnownNonNegative)
00674         continue;
00675       return false;
00676     }
00677 
00678     return true;
00679   };
00680 
00681   // FIXME: If the GEP is not inbounds, and there are extra indices after the
00682   // one we'll replace, those could cause the address computation to wrap
00683   // (rendering the IsAllNonNegative() check below insufficient). We can do
00684   // better, ignoring zero indicies (and other indicies we can prove small
00685   // enough not to wrap).
00686   if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
00687     return false;
00688 
00689   // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
00690   // also known to be dereferenceable.
00691   return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
00692          IsAllNonNegative();
00693 }
00694 
00695 // If we're indexing into an object with a variable index for the memory
00696 // access, but the object has only one element, we can assume that the index
00697 // will always be zero. If we replace the GEP, return it.
00698 template <typename T>
00699 static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
00700                                           T &MemI) {
00701   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
00702     unsigned Idx;
00703     if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
00704       Instruction *NewGEPI = GEPI->clone();
00705       NewGEPI->setOperand(Idx,
00706         ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
00707       NewGEPI->insertBefore(GEPI);
00708       MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
00709       return NewGEPI;
00710     }
00711   }
00712 
00713   return nullptr;
00714 }
00715 
00716 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
00717   Value *Op = LI.getOperand(0);
00718 
00719   // Try to canonicalize the loaded type.
00720   if (Instruction *Res = combineLoadToOperationType(*this, LI))
00721     return Res;
00722 
00723   // Attempt to improve the alignment.
00724   unsigned KnownAlign = getOrEnforceKnownAlignment(
00725       Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
00726   unsigned LoadAlign = LI.getAlignment();
00727   unsigned EffectiveLoadAlign =
00728       LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
00729 
00730   if (KnownAlign > EffectiveLoadAlign)
00731     LI.setAlignment(KnownAlign);
00732   else if (LoadAlign == 0)
00733     LI.setAlignment(EffectiveLoadAlign);
00734 
00735   // Replace GEP indices if possible.
00736   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
00737       Worklist.Add(NewGEPI);
00738       return &LI;
00739   }
00740 
00741   // None of the following transforms are legal for volatile/atomic loads.
00742   // FIXME: Some of it is okay for atomic loads; needs refactoring.
00743   if (!LI.isSimple()) return nullptr;
00744 
00745   if (Instruction *Res = unpackLoadToAggregate(*this, LI))
00746     return Res;
00747 
00748   // Do really simple store-to-load forwarding and load CSE, to catch cases
00749   // where there are several consecutive memory accesses to the same location,
00750   // separated by a few arithmetic operations.
00751   BasicBlock::iterator BBI = &LI;
00752   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
00753     return ReplaceInstUsesWith(
00754         LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
00755                                             LI.getName() + ".cast"));
00756 
00757   // load(gep null, ...) -> unreachable
00758   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
00759     const Value *GEPI0 = GEPI->getOperand(0);
00760     // TODO: Consider a target hook for valid address spaces for this xform.
00761     if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
00762       // Insert a new store to null instruction before the load to indicate
00763       // that this code is not reachable.  We do this instead of inserting
00764       // an unreachable instruction directly because we cannot modify the
00765       // CFG.
00766       new StoreInst(UndefValue::get(LI.getType()),
00767                     Constant::getNullValue(Op->getType()), &LI);
00768       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00769     }
00770   }
00771 
00772   // load null/undef -> unreachable
00773   // TODO: Consider a target hook for valid address spaces for this xform.
00774   if (isa<UndefValue>(Op) ||
00775       (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
00776     // Insert a new store to null instruction before the load to indicate that
00777     // this code is not reachable.  We do this instead of inserting an
00778     // unreachable instruction directly because we cannot modify the CFG.
00779     new StoreInst(UndefValue::get(LI.getType()),
00780                   Constant::getNullValue(Op->getType()), &LI);
00781     return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
00782   }
00783 
00784   if (Op->hasOneUse()) {
00785     // Change select and PHI nodes to select values instead of addresses: this
00786     // helps alias analysis out a lot, allows many others simplifications, and
00787     // exposes redundancy in the code.
00788     //
00789     // Note that we cannot do the transformation unless we know that the
00790     // introduced loads cannot trap!  Something like this is valid as long as
00791     // the condition is always false: load (select bool %C, int* null, int* %G),
00792     // but it would not be valid if we transformed it to load from null
00793     // unconditionally.
00794     //
00795     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
00796       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
00797       unsigned Align = LI.getAlignment();
00798       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
00799           isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
00800         LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
00801                                            SI->getOperand(1)->getName()+".val");
00802         LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
00803                                            SI->getOperand(2)->getName()+".val");
00804         V1->setAlignment(Align);
00805         V2->setAlignment(Align);
00806         return SelectInst::Create(SI->getCondition(), V1, V2);
00807       }
00808 
00809       // load (select (cond, null, P)) -> load P
00810       if (isa<ConstantPointerNull>(SI->getOperand(1)) && 
00811           LI.getPointerAddressSpace() == 0) {
00812         LI.setOperand(0, SI->getOperand(2));
00813         return &LI;
00814       }
00815 
00816       // load (select (cond, P, null)) -> load P
00817       if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
00818           LI.getPointerAddressSpace() == 0) {
00819         LI.setOperand(0, SI->getOperand(1));
00820         return &LI;
00821       }
00822     }
00823   }
00824   return nullptr;
00825 }
00826 
00827 /// \brief Combine stores to match the type of value being stored.
00828 ///
00829 /// The core idea here is that the memory does not have any intrinsic type and
00830 /// where we can we should match the type of a store to the type of value being
00831 /// stored.
00832 ///
00833 /// However, this routine must never change the width of a store or the number of
00834 /// stores as that would introduce a semantic change. This combine is expected to
00835 /// be a semantic no-op which just allows stores to more closely model the types
00836 /// of their incoming values.
00837 ///
00838 /// Currently, we also refuse to change the precise type used for an atomic or
00839 /// volatile store. This is debatable, and might be reasonable to change later.
00840 /// However, it is risky in case some backend or other part of LLVM is relying
00841 /// on the exact type stored to select appropriate atomic operations.
00842 ///
00843 /// \returns true if the store was successfully combined away. This indicates
00844 /// the caller must erase the store instruction. We have to let the caller erase
00845 /// the store instruction sas otherwise there is no way to signal whether it was
00846 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
00847 static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
00848   // FIXME: We could probably with some care handle both volatile and atomic
00849   // stores here but it isn't clear that this is important.
00850   if (!SI.isSimple())
00851     return false;
00852 
00853   Value *V = SI.getValueOperand();
00854 
00855   // Fold away bit casts of the stored value by storing the original type.
00856   if (auto *BC = dyn_cast<BitCastInst>(V)) {
00857     V = BC->getOperand(0);
00858     combineStoreToNewValue(IC, SI, V);
00859     return true;
00860   }
00861 
00862   // FIXME: We should also canonicalize loads of vectors when their elements are
00863   // cast to other types.
00864   return false;
00865 }
00866 
00867 static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
00868   // FIXME: We could probably with some care handle both volatile and atomic
00869   // stores here but it isn't clear that this is important.
00870   if (!SI.isSimple())
00871     return false;
00872 
00873   Value *V = SI.getValueOperand();
00874   Type *T = V->getType();
00875 
00876   if (!T->isAggregateType())
00877     return false;
00878 
00879   if (auto *ST = dyn_cast<StructType>(T)) {
00880     // If the struct only have one element, we unpack.
00881     if (ST->getNumElements() == 1) {
00882       V = IC.Builder->CreateExtractValue(V, 0);
00883       combineStoreToNewValue(IC, SI, V);
00884       return true;
00885     }
00886   }
00887 
00888   if (auto *AT = dyn_cast<ArrayType>(T)) {
00889     // If the array only have one element, we unpack.
00890     if (AT->getNumElements() == 1) {
00891       V = IC.Builder->CreateExtractValue(V, 0);
00892       combineStoreToNewValue(IC, SI, V);
00893       return true;
00894     }
00895   }
00896 
00897   return false;
00898 }
00899 
00900 /// equivalentAddressValues - Test if A and B will obviously have the same
00901 /// value. This includes recognizing that %t0 and %t1 will have the same
00902 /// value in code like this:
00903 ///   %t0 = getelementptr \@a, 0, 3
00904 ///   store i32 0, i32* %t0
00905 ///   %t1 = getelementptr \@a, 0, 3
00906 ///   %t2 = load i32* %t1
00907 ///
00908 static bool equivalentAddressValues(Value *A, Value *B) {
00909   // Test if the values are trivially equivalent.
00910   if (A == B) return true;
00911 
00912   // Test if the values come form identical arithmetic instructions.
00913   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
00914   // its only used to compare two uses within the same basic block, which
00915   // means that they'll always either have the same value or one of them
00916   // will have an undefined value.
00917   if (isa<BinaryOperator>(A) ||
00918       isa<CastInst>(A) ||
00919       isa<PHINode>(A) ||
00920       isa<GetElementPtrInst>(A))
00921     if (Instruction *BI = dyn_cast<Instruction>(B))
00922       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
00923         return true;
00924 
00925   // Otherwise they may not be equivalent.
00926   return false;
00927 }
00928 
00929 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
00930   Value *Val = SI.getOperand(0);
00931   Value *Ptr = SI.getOperand(1);
00932 
00933   // Try to canonicalize the stored type.
00934   if (combineStoreToValueType(*this, SI))
00935     return EraseInstFromFunction(SI);
00936 
00937   // Attempt to improve the alignment.
00938   unsigned KnownAlign = getOrEnforceKnownAlignment(
00939       Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
00940   unsigned StoreAlign = SI.getAlignment();
00941   unsigned EffectiveStoreAlign =
00942       StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
00943 
00944   if (KnownAlign > EffectiveStoreAlign)
00945     SI.setAlignment(KnownAlign);
00946   else if (StoreAlign == 0)
00947     SI.setAlignment(EffectiveStoreAlign);
00948 
00949   // Try to canonicalize the stored type.
00950   if (unpackStoreToAggregate(*this, SI))
00951     return EraseInstFromFunction(SI);
00952 
00953   // Replace GEP indices if possible.
00954   if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
00955       Worklist.Add(NewGEPI);
00956       return &SI;
00957   }
00958 
00959   // Don't hack volatile/atomic stores.
00960   // FIXME: Some bits are legal for atomic stores; needs refactoring.
00961   if (!SI.isSimple()) return nullptr;
00962 
00963   // If the RHS is an alloca with a single use, zapify the store, making the
00964   // alloca dead.
00965   if (Ptr->hasOneUse()) {
00966     if (isa<AllocaInst>(Ptr))
00967       return EraseInstFromFunction(SI);
00968     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
00969       if (isa<AllocaInst>(GEP->getOperand(0))) {
00970         if (GEP->getOperand(0)->hasOneUse())
00971           return EraseInstFromFunction(SI);
00972       }
00973     }
00974   }
00975 
00976   // Do really simple DSE, to catch cases where there are several consecutive
00977   // stores to the same location, separated by a few arithmetic operations. This
00978   // situation often occurs with bitfield accesses.
00979   BasicBlock::iterator BBI = &SI;
00980   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
00981        --ScanInsts) {
00982     --BBI;
00983     // Don't count debug info directives, lest they affect codegen,
00984     // and we skip pointer-to-pointer bitcasts, which are NOPs.
00985     if (isa<DbgInfoIntrinsic>(BBI) ||
00986         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
00987       ScanInsts++;
00988       continue;
00989     }
00990 
00991     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
00992       // Prev store isn't volatile, and stores to the same location?
00993       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
00994                                                         SI.getOperand(1))) {
00995         ++NumDeadStore;
00996         ++BBI;
00997         EraseInstFromFunction(*PrevSI);
00998         continue;
00999       }
01000       break;
01001     }
01002 
01003     // If this is a load, we have to stop.  However, if the loaded value is from
01004     // the pointer we're loading and is producing the pointer we're storing,
01005     // then *this* store is dead (X = load P; store X -> P).
01006     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
01007       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
01008           LI->isSimple())
01009         return EraseInstFromFunction(SI);
01010 
01011       // Otherwise, this is a load from some other location.  Stores before it
01012       // may not be dead.
01013       break;
01014     }
01015 
01016     // Don't skip over loads or things that can modify memory.
01017     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
01018       break;
01019   }
01020 
01021   // store X, null    -> turns into 'unreachable' in SimplifyCFG
01022   if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
01023     if (!isa<UndefValue>(Val)) {
01024       SI.setOperand(0, UndefValue::get(Val->getType()));
01025       if (Instruction *U = dyn_cast<Instruction>(Val))
01026         Worklist.Add(U);  // Dropped a use.
01027     }
01028     return nullptr;  // Do not modify these!
01029   }
01030 
01031   // store undef, Ptr -> noop
01032   if (isa<UndefValue>(Val))
01033     return EraseInstFromFunction(SI);
01034 
01035   // If this store is the last instruction in the basic block (possibly
01036   // excepting debug info instructions), and if the block ends with an
01037   // unconditional branch, try to move it to the successor block.
01038   BBI = &SI;
01039   do {
01040     ++BBI;
01041   } while (isa<DbgInfoIntrinsic>(BBI) ||
01042            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
01043   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
01044     if (BI->isUnconditional())
01045       if (SimplifyStoreAtEndOfBlock(SI))
01046         return nullptr;  // xform done!
01047 
01048   return nullptr;
01049 }
01050 
01051 /// SimplifyStoreAtEndOfBlock - Turn things like:
01052 ///   if () { *P = v1; } else { *P = v2 }
01053 /// into a phi node with a store in the successor.
01054 ///
01055 /// Simplify things like:
01056 ///   *P = v1; if () { *P = v2; }
01057 /// into a phi node with a store in the successor.
01058 ///
01059 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
01060   BasicBlock *StoreBB = SI.getParent();
01061 
01062   // Check to see if the successor block has exactly two incoming edges.  If
01063   // so, see if the other predecessor contains a store to the same location.
01064   // if so, insert a PHI node (if needed) and move the stores down.
01065   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
01066 
01067   // Determine whether Dest has exactly two predecessors and, if so, compute
01068   // the other predecessor.
01069   pred_iterator PI = pred_begin(DestBB);
01070   BasicBlock *P = *PI;
01071   BasicBlock *OtherBB = nullptr;
01072 
01073   if (P != StoreBB)
01074     OtherBB = P;
01075 
01076   if (++PI == pred_end(DestBB))
01077     return false;
01078 
01079   P = *PI;
01080   if (P != StoreBB) {
01081     if (OtherBB)
01082       return false;
01083     OtherBB = P;
01084   }
01085   if (++PI != pred_end(DestBB))
01086     return false;
01087 
01088   // Bail out if all the relevant blocks aren't distinct (this can happen,
01089   // for example, if SI is in an infinite loop)
01090   if (StoreBB == DestBB || OtherBB == DestBB)
01091     return false;
01092 
01093   // Verify that the other block ends in a branch and is not otherwise empty.
01094   BasicBlock::iterator BBI = OtherBB->getTerminator();
01095   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
01096   if (!OtherBr || BBI == OtherBB->begin())
01097     return false;
01098 
01099   // If the other block ends in an unconditional branch, check for the 'if then
01100   // else' case.  there is an instruction before the branch.
01101   StoreInst *OtherStore = nullptr;
01102   if (OtherBr->isUnconditional()) {
01103     --BBI;
01104     // Skip over debugging info.
01105     while (isa<DbgInfoIntrinsic>(BBI) ||
01106            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
01107       if (BBI==OtherBB->begin())
01108         return false;
01109       --BBI;
01110     }
01111     // If this isn't a store, isn't a store to the same location, or is not the
01112     // right kind of store, bail out.
01113     OtherStore = dyn_cast<StoreInst>(BBI);
01114     if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
01115         !SI.isSameOperationAs(OtherStore))
01116       return false;
01117   } else {
01118     // Otherwise, the other block ended with a conditional branch. If one of the
01119     // destinations is StoreBB, then we have the if/then case.
01120     if (OtherBr->getSuccessor(0) != StoreBB &&
01121         OtherBr->getSuccessor(1) != StoreBB)
01122       return false;
01123 
01124     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
01125     // if/then triangle.  See if there is a store to the same ptr as SI that
01126     // lives in OtherBB.
01127     for (;; --BBI) {
01128       // Check to see if we find the matching store.
01129       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
01130         if (OtherStore->getOperand(1) != SI.getOperand(1) ||
01131             !SI.isSameOperationAs(OtherStore))
01132           return false;
01133         break;
01134       }
01135       // If we find something that may be using or overwriting the stored
01136       // value, or if we run out of instructions, we can't do the xform.
01137       if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
01138           BBI == OtherBB->begin())
01139         return false;
01140     }
01141 
01142     // In order to eliminate the store in OtherBr, we have to
01143     // make sure nothing reads or overwrites the stored value in
01144     // StoreBB.
01145     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
01146       // FIXME: This should really be AA driven.
01147       if (I->mayReadFromMemory() || I->mayWriteToMemory())
01148         return false;
01149     }
01150   }
01151 
01152   // Insert a PHI node now if we need it.
01153   Value *MergedVal = OtherStore->getOperand(0);
01154   if (MergedVal != SI.getOperand(0)) {
01155     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
01156     PN->addIncoming(SI.getOperand(0), SI.getParent());
01157     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
01158     MergedVal = InsertNewInstBefore(PN, DestBB->front());
01159   }
01160 
01161   // Advance to a place where it is safe to insert the new store and
01162   // insert it.
01163   BBI = DestBB->getFirstInsertionPt();
01164   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
01165                                    SI.isVolatile(),
01166                                    SI.getAlignment(),
01167                                    SI.getOrdering(),
01168                                    SI.getSynchScope());
01169   InsertNewInstBefore(NewSI, *BBI);
01170   NewSI->setDebugLoc(OtherStore->getDebugLoc());
01171 
01172   // If the two stores had AA tags, merge them.
01173   AAMDNodes AATags;
01174   SI.getAAMetadata(AATags);
01175   if (AATags) {
01176     OtherStore->getAAMetadata(AATags, /* Merge = */ true);
01177     NewSI->setAAMetadata(AATags);
01178   }
01179 
01180   // Nuke the old stores.
01181   EraseInstFromFunction(SI);
01182   EraseInstFromFunction(*OtherStore);
01183   return true;
01184 }