LLVM  10.0.0svn
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for load, store and alloca.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/MapVector.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/ConstantRange.h"
20 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/MDBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
27 using namespace llvm;
28 using namespace PatternMatch;
29 
30 #define DEBUG_TYPE "instcombine"
31 
32 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34 
35 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
36 /// some part of a constant global variable. This intentionally only accepts
37 /// constant expressions because we can't rewrite arbitrary instructions.
38 static bool pointsToConstantGlobal(Value *V) {
39  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
40  return GV->isConstant();
41 
42  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
43  if (CE->getOpcode() == Instruction::BitCast ||
44  CE->getOpcode() == Instruction::AddrSpaceCast ||
45  CE->getOpcode() == Instruction::GetElementPtr)
46  return pointsToConstantGlobal(CE->getOperand(0));
47  }
48  return false;
49 }
50 
51 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
52 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
53 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
54 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
55 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
56 /// the alloca, and if the source pointer is a pointer to a constant global, we
57 /// can optimize this.
58 static bool
61  // We track lifetime intrinsics as we encounter them. If we decide to go
62  // ahead and replace the value with the global, this lets the caller quickly
63  // eliminate the markers.
64 
65  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
66  ValuesToInspect.emplace_back(V, false);
67  while (!ValuesToInspect.empty()) {
68  auto ValuePair = ValuesToInspect.pop_back_val();
69  const bool IsOffset = ValuePair.second;
70  for (auto &U : ValuePair.first->uses()) {
71  auto *I = cast<Instruction>(U.getUser());
72 
73  if (auto *LI = dyn_cast<LoadInst>(I)) {
74  // Ignore non-volatile loads, they are always ok.
75  if (!LI->isSimple()) return false;
76  continue;
77  }
78 
79  if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
80  // If uses of the bitcast are ok, we are ok.
81  ValuesToInspect.emplace_back(I, IsOffset);
82  continue;
83  }
84  if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
85  // If the GEP has all zero indices, it doesn't offset the pointer. If it
86  // doesn't, it does.
87  ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
88  continue;
89  }
90 
91  if (auto *Call = dyn_cast<CallBase>(I)) {
92  // If this is the function being called then we treat it like a load and
93  // ignore it.
94  if (Call->isCallee(&U))
95  continue;
96 
97  unsigned DataOpNo = Call->getDataOperandNo(&U);
98  bool IsArgOperand = Call->isArgOperand(&U);
99 
100  // Inalloca arguments are clobbered by the call.
101  if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
102  return false;
103 
104  // If this is a readonly/readnone call site, then we know it is just a
105  // load (but one that potentially returns the value itself), so we can
106  // ignore it if we know that the value isn't captured.
107  if (Call->onlyReadsMemory() &&
108  (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
109  continue;
110 
111  // If this is being passed as a byval argument, the caller is making a
112  // copy, so it is only a read of the alloca.
113  if (IsArgOperand && Call->isByValArgument(DataOpNo))
114  continue;
115  }
116 
117  // Lifetime intrinsics can be handled by the caller.
118  if (I->isLifetimeStartOrEnd()) {
119  assert(I->use_empty() && "Lifetime markers have no result to use!");
120  ToDelete.push_back(I);
121  continue;
122  }
123 
124  // If this is isn't our memcpy/memmove, reject it as something we can't
125  // handle.
127  if (!MI)
128  return false;
129 
130  // If the transfer is using the alloca as a source of the transfer, then
131  // ignore it since it is a load (unless the transfer is volatile).
132  if (U.getOperandNo() == 1) {
133  if (MI->isVolatile()) return false;
134  continue;
135  }
136 
137  // If we already have seen a copy, reject the second one.
138  if (TheCopy) return false;
139 
140  // If the pointer has been offset from the start of the alloca, we can't
141  // safely handle this.
142  if (IsOffset) return false;
143 
144  // If the memintrinsic isn't using the alloca as the dest, reject it.
145  if (U.getOperandNo() != 0) return false;
146 
147  // If the source of the memcpy/move is not a constant global, reject it.
148  if (!pointsToConstantGlobal(MI->getSource()))
149  return false;
150 
151  // Otherwise, the transform is safe. Remember the copy instruction.
152  TheCopy = MI;
153  }
154  }
155  return true;
156 }
157 
158 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
159 /// modified by a copy from a constant global. If we can prove this, we can
160 /// replace any uses of the alloca with uses of the global directly.
161 static MemTransferInst *
163  SmallVectorImpl<Instruction *> &ToDelete) {
164  MemTransferInst *TheCopy = nullptr;
165  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
166  return TheCopy;
167  return nullptr;
168 }
169 
170 /// Returns true if V is dereferenceable for size of alloca.
171 static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
172  const DataLayout &DL) {
173  if (AI->isArrayAllocation())
174  return false;
175  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
176  if (!AllocaSize)
177  return false;
179  APInt(64, AllocaSize), DL);
180 }
181 
183  // Check for array size of 1 (scalar allocation).
184  if (!AI.isArrayAllocation()) {
185  // i32 1 is the canonical array size for scalar allocations.
186  if (AI.getArraySize()->getType()->isIntegerTy(32))
187  return nullptr;
188 
189  // Canonicalize it.
190  Value *V = IC.Builder.getInt32(1);
191  AI.setOperand(0, V);
192  return &AI;
193  }
194 
195  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
196  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
197  if (C->getValue().getActiveBits() <= 64) {
198  Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
199  AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
201 
202  // Scan to the end of the allocation instructions, to skip over a block of
203  // allocas if possible...also skip interleaved debug info
204  //
205  BasicBlock::iterator It(New);
206  while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
207  ++It;
208 
209  // Now that I is pointing to the first non-allocation-inst in the block,
210  // insert our getelementptr instruction...
211  //
212  Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
213  Value *NullIdx = Constant::getNullValue(IdxTy);
214  Value *Idx[2] = {NullIdx, NullIdx};
216  NewTy, New, Idx, New->getName() + ".sub");
217  IC.InsertNewInstBefore(GEP, *It);
218 
219  // Now make everything use the getelementptr instead of the original
220  // allocation.
221  return IC.replaceInstUsesWith(AI, GEP);
222  }
223  }
224 
225  if (isa<UndefValue>(AI.getArraySize()))
227 
228  // Ensure that the alloca array size argument has type intptr_t, so that
229  // any casting is exposed early.
230  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
231  if (AI.getArraySize()->getType() != IntPtrTy) {
232  Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
233  AI.setOperand(0, V);
234  return &AI;
235  }
236 
237  return nullptr;
238 }
239 
240 namespace {
241 // If I and V are pointers in different address space, it is not allowed to
242 // use replaceAllUsesWith since I and V have different types. A
243 // non-target-specific transformation should not use addrspacecast on V since
244 // the two address space may be disjoint depending on target.
245 //
246 // This class chases down uses of the old pointer until reaching the load
247 // instructions, then replaces the old pointer in the load instructions with
248 // the new pointer. If during the chasing it sees bitcast or GEP, it will
249 // create new bitcast or GEP with the new pointer and use them in the load
250 // instruction.
251 class PointerReplacer {
252 public:
253  PointerReplacer(InstCombiner &IC) : IC(IC) {}
254  void replacePointer(Instruction &I, Value *V);
255 
256 private:
257  void findLoadAndReplace(Instruction &I);
258  void replace(Instruction *I);
259  Value *getReplacement(Value *I);
260 
263  InstCombiner &IC;
264 };
265 } // end anonymous namespace
266 
267 void PointerReplacer::findLoadAndReplace(Instruction &I) {
268  for (auto U : I.users()) {
269  auto *Inst = dyn_cast<Instruction>(&*U);
270  if (!Inst)
271  return;
272  LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
273  if (isa<LoadInst>(Inst)) {
274  for (auto P : Path)
275  replace(P);
276  replace(Inst);
277  } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
278  Path.push_back(Inst);
279  findLoadAndReplace(*Inst);
280  Path.pop_back();
281  } else {
282  return;
283  }
284  }
285 }
286 
287 Value *PointerReplacer::getReplacement(Value *V) {
288  auto Loc = WorkMap.find(V);
289  if (Loc != WorkMap.end())
290  return Loc->second;
291  return nullptr;
292 }
293 
295  if (getReplacement(I))
296  return;
297 
298  if (auto *LT = dyn_cast<LoadInst>(I)) {
299  auto *V = getReplacement(LT->getPointerOperand());
300  assert(V && "Operand not replaced");
301  auto *NewI = new LoadInst(I->getType(), V);
302  NewI->takeName(LT);
303  IC.InsertNewInstWith(NewI, *LT);
304  IC.replaceInstUsesWith(*LT, NewI);
305  WorkMap[LT] = NewI;
306  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
307  auto *V = getReplacement(GEP->getPointerOperand());
308  assert(V && "Operand not replaced");
309  SmallVector<Value *, 8> Indices;
310  Indices.append(GEP->idx_begin(), GEP->idx_end());
311  auto *NewI = GetElementPtrInst::Create(
312  V->getType()->getPointerElementType(), V, Indices);
313  IC.InsertNewInstWith(NewI, *GEP);
314  NewI->takeName(GEP);
315  WorkMap[GEP] = NewI;
316  } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
317  auto *V = getReplacement(BC->getOperand(0));
318  assert(V && "Operand not replaced");
319  auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
321  auto *NewI = new BitCastInst(V, NewT);
322  IC.InsertNewInstWith(NewI, *BC);
323  NewI->takeName(BC);
324  WorkMap[BC] = NewI;
325  } else {
326  llvm_unreachable("should never reach here");
327  }
328 }
329 
330 void PointerReplacer::replacePointer(Instruction &I, Value *V) {
331 #ifndef NDEBUG
332  auto *PT = cast<PointerType>(I.getType());
333  auto *NT = cast<PointerType>(V->getType());
334  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
335  "Invalid usage");
336 #endif
337  WorkMap[&I] = V;
338  findLoadAndReplace(I);
339 }
340 
342  if (auto *I = simplifyAllocaArraySize(*this, AI))
343  return I;
344 
345  if (AI.getAllocatedType()->isSized()) {
346  // If the alignment is 0 (unspecified), assign it the preferred alignment.
347  if (AI.getAlignment() == 0)
348  AI.setAlignment(
349  MaybeAlign(DL.getPrefTypeAlignment(AI.getAllocatedType())));
350 
351  // Move all alloca's of zero byte objects to the entry block and merge them
352  // together. Note that we only do this for alloca's, because malloc should
353  // allocate and return a unique pointer, even for a zero byte allocation.
354  if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
355  // For a zero sized alloca there is no point in doing an array allocation.
356  // This is helpful if the array size is a complicated expression not used
357  // elsewhere.
358  if (AI.isArrayAllocation()) {
359  AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
360  return &AI;
361  }
362 
363  // Get the first instruction in the entry block.
364  BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
365  Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
366  if (FirstInst != &AI) {
367  // If the entry block doesn't start with a zero-size alloca then move
368  // this one to the start of the entry block. There is no problem with
369  // dominance as the array size was forced to a constant earlier already.
370  AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
371  if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
372  DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
373  AI.moveBefore(FirstInst);
374  return &AI;
375  }
376 
377  // If the alignment of the entry block alloca is 0 (unspecified),
378  // assign it the preferred alignment.
379  if (EntryAI->getAlignment() == 0)
380  EntryAI->setAlignment(
381  MaybeAlign(DL.getPrefTypeAlignment(EntryAI->getAllocatedType())));
382  // Replace this zero-sized alloca with the one at the start of the entry
383  // block after ensuring that the address will be aligned enough for both
384  // types.
385  const MaybeAlign MaxAlign(
386  std::max(EntryAI->getAlignment(), AI.getAlignment()));
387  EntryAI->setAlignment(MaxAlign);
388  if (AI.getType() != EntryAI->getType())
389  return new BitCastInst(EntryAI, AI.getType());
390  return replaceInstUsesWith(AI, EntryAI);
391  }
392  }
393  }
394 
395  if (AI.getAlignment()) {
396  // Check to see if this allocation is only modified by a memcpy/memmove from
397  // a constant global whose alignment is equal to or exceeds that of the
398  // allocation. If this is the case, we can change all users to use
399  // the constant global instead. This is commonly produced by the CFE by
400  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
401  // is only subsequently read.
403  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
404  unsigned SourceAlign = getOrEnforceKnownAlignment(
405  Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
406  if (AI.getAlignment() <= SourceAlign &&
407  isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
408  LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
409  LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
410  for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
411  eraseInstFromFunction(*ToDelete[i]);
412  Constant *TheSrc = cast<Constant>(Copy->getSource());
413  auto *SrcTy = TheSrc->getType();
414  auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
415  SrcTy->getPointerAddressSpace());
416  Constant *Cast =
418  if (AI.getType()->getPointerAddressSpace() ==
419  SrcTy->getPointerAddressSpace()) {
420  Instruction *NewI = replaceInstUsesWith(AI, Cast);
421  eraseInstFromFunction(*Copy);
422  ++NumGlobalCopies;
423  return NewI;
424  } else {
425  PointerReplacer PtrReplacer(*this);
426  PtrReplacer.replacePointer(AI, Cast);
427  ++NumGlobalCopies;
428  }
429  }
430  }
431  }
432 
433  // At last, use the generic allocation site handler to aggressively remove
434  // unused allocas.
435  return visitAllocSite(AI);
436 }
437 
438 // Are we allowed to form a atomic load or store of this type?
439 static bool isSupportedAtomicType(Type *Ty) {
440  return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
441 }
442 
443 /// Helper to combine a load to a new type.
444 ///
445 /// This just does the work of combining a load to a new type. It handles
446 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
447 /// loaded *value* type. This will convert it to a pointer, cast the operand to
448 /// that pointer type, load it, etc.
449 ///
450 /// Note that this will create all of the instructions with whatever insert
451 /// point the \c InstCombiner currently is using.
453  const Twine &Suffix = "") {
454  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
455  "can't fold an atomic load to requested type");
456 
457  Value *Ptr = LI.getPointerOperand();
458  unsigned AS = LI.getPointerAddressSpace();
459  Value *NewPtr = nullptr;
460  if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
461  NewPtr->getType()->getPointerElementType() == NewTy &&
462  NewPtr->getType()->getPointerAddressSpace() == AS))
463  NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
464 
465  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
466  NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
467  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
468  copyMetadataForLoad(*NewLoad, LI);
469  return NewLoad;
470 }
471 
472 /// Combine a store to a new type.
473 ///
474 /// Returns the newly created store instruction.
476  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
477  "can't fold an atomic store of requested type");
478 
479  Value *Ptr = SI.getPointerOperand();
480  unsigned AS = SI.getPointerAddressSpace();
482  SI.getAllMetadata(MD);
483 
484  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
485  V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
486  SI.getAlignment(), SI.isVolatile());
487  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
488  for (const auto &MDPair : MD) {
489  unsigned ID = MDPair.first;
490  MDNode *N = MDPair.second;
491  // Note, essentially every kind of metadata should be preserved here! This
492  // routine is supposed to clone a store instruction changing *only its
493  // type*. The only metadata it makes sense to drop is metadata which is
494  // invalidated when the pointer type changes. This should essentially
495  // never be the case in LLVM, but we explicitly switch over only known
496  // metadata to be conservatively correct. If you are adding metadata to
497  // LLVM which pertains to stores, you almost certainly want to add it
498  // here.
499  switch (ID) {
500  case LLVMContext::MD_dbg:
501  case LLVMContext::MD_tbaa:
502  case LLVMContext::MD_prof:
503  case LLVMContext::MD_fpmath:
504  case LLVMContext::MD_tbaa_struct:
505  case LLVMContext::MD_alias_scope:
506  case LLVMContext::MD_noalias:
507  case LLVMContext::MD_nontemporal:
508  case LLVMContext::MD_mem_parallel_loop_access:
509  case LLVMContext::MD_access_group:
510  // All of these directly apply.
511  NewStore->setMetadata(ID, N);
512  break;
513  case LLVMContext::MD_invariant_load:
514  case LLVMContext::MD_nonnull:
515  case LLVMContext::MD_range:
516  case LLVMContext::MD_align:
517  case LLVMContext::MD_dereferenceable:
518  case LLVMContext::MD_dereferenceable_or_null:
519  // These don't apply for stores.
520  break;
521  }
522  }
523 
524  return NewStore;
525 }
526 
527 /// Returns true if instruction represent minmax pattern like:
528 /// select ((cmp load V1, load V2), V1, V2).
529 static bool isMinMaxWithLoads(Value *V) {
530  assert(V->getType()->isPointerTy() && "Expected pointer type.");
531  // Ignore possible ty* to ixx* bitcast.
532  V = peekThroughBitcast(V);
533  // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
534  // pattern.
535  CmpInst::Predicate Pred;
536  Instruction *L1;
537  Instruction *L2;
538  Value *LHS;
539  Value *RHS;
540  if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
541  m_Value(LHS), m_Value(RHS))))
542  return false;
543  return (match(L1, m_Load(m_Specific(LHS))) &&
544  match(L2, m_Load(m_Specific(RHS)))) ||
545  (match(L1, m_Load(m_Specific(RHS))) &&
546  match(L2, m_Load(m_Specific(LHS))));
547 }
548 
549 /// Combine loads to match the type of their uses' value after looking
550 /// through intervening bitcasts.
551 ///
552 /// The core idea here is that if the result of a load is used in an operation,
553 /// we should load the type most conducive to that operation. For example, when
554 /// loading an integer and converting that immediately to a pointer, we should
555 /// instead directly load a pointer.
556 ///
557 /// However, this routine must never change the width of a load or the number of
558 /// loads as that would introduce a semantic change. This combine is expected to
559 /// be a semantic no-op which just allows loads to more closely model the types
560 /// of their consuming operations.
561 ///
562 /// Currently, we also refuse to change the precise type used for an atomic load
563 /// or a volatile load. This is debatable, and might be reasonable to change
564 /// later. However, it is risky in case some backend or other part of LLVM is
565 /// relying on the exact type loaded to select appropriate atomic operations.
567  // FIXME: We could probably with some care handle both volatile and ordered
568  // atomic loads here but it isn't clear that this is important.
569  if (!LI.isUnordered())
570  return nullptr;
571 
572  if (LI.use_empty())
573  return nullptr;
574 
575  // swifterror values can't be bitcasted.
576  if (LI.getPointerOperand()->isSwiftError())
577  return nullptr;
578 
579  Type *Ty = LI.getType();
580  const DataLayout &DL = IC.getDataLayout();
581 
582  // Try to canonicalize loads which are only ever stored to operate over
583  // integers instead of any other type. We only do this when the loaded type
584  // is sized and has a size exactly the same as its store size and the store
585  // size is a legal integer type.
586  // Do not perform canonicalization if minmax pattern is found (to avoid
587  // infinite loop).
588  if (!Ty->isIntegerTy() && Ty->isSized() &&
590  DL.typeSizeEqualsStoreSize(Ty) &&
591  !DL.isNonIntegralPointerType(Ty) &&
593  peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
594  if (all_of(LI.users(), [&LI](User *U) {
595  auto *SI = dyn_cast<StoreInst>(U);
596  return SI && SI->getPointerOperand() != &LI &&
597  !SI->getPointerOperand()->isSwiftError();
598  })) {
599  LoadInst *NewLoad = combineLoadToNewType(
600  IC, LI,
602  // Replace all the stores with stores of the newly loaded value.
603  for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
604  auto *SI = cast<StoreInst>(*UI++);
606  combineStoreToNewValue(IC, *SI, NewLoad);
608  }
609  assert(LI.use_empty() && "Failed to remove all users of the load!");
610  // Return the old load so the combiner can delete it safely.
611  return &LI;
612  }
613  }
614 
615  // Fold away bit casts of the loaded value by loading the desired type.
616  // We can do this for BitCastInsts as well as casts from and to pointer types,
617  // as long as those are noops (i.e., the source or dest type have the same
618  // bitwidth as the target's pointers).
619  if (LI.hasOneUse())
620  if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
621  if (CI->isNoopCast(DL))
622  if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
623  LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
624  CI->replaceAllUsesWith(NewLoad);
625  IC.eraseInstFromFunction(*CI);
626  return &LI;
627  }
628 
629  // FIXME: We should also canonicalize loads of vectors when their elements are
630  // cast to other types.
631  return nullptr;
632 }
633 
635  // FIXME: We could probably with some care handle both volatile and atomic
636  // stores here but it isn't clear that this is important.
637  if (!LI.isSimple())
638  return nullptr;
639 
640  Type *T = LI.getType();
641  if (!T->isAggregateType())
642  return nullptr;
643 
644  StringRef Name = LI.getName();
645  assert(LI.getAlignment() && "Alignment must be set at this point");
646 
647  if (auto *ST = dyn_cast<StructType>(T)) {
648  // If the struct only have one element, we unpack.
649  auto NumElements = ST->getNumElements();
650  if (NumElements == 1) {
651  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
652  ".unpack");
653  AAMDNodes AAMD;
654  LI.getAAMetadata(AAMD);
655  NewLoad->setAAMetadata(AAMD);
657  UndefValue::get(T), NewLoad, 0, Name));
658  }
659 
660  // We don't want to break loads with padding here as we'd loose
661  // the knowledge that padding exists for the rest of the pipeline.
662  const DataLayout &DL = IC.getDataLayout();
663  auto *SL = DL.getStructLayout(ST);
664  if (SL->hasPadding())
665  return nullptr;
666 
667  auto Align = LI.getAlignment();
668  if (!Align)
670 
671  auto *Addr = LI.getPointerOperand();
672  auto *IdxType = Type::getInt32Ty(T->getContext());
673  auto *Zero = ConstantInt::get(IdxType, 0);
674 
675  Value *V = UndefValue::get(T);
676  for (unsigned i = 0; i < NumElements; i++) {
677  Value *Indices[2] = {
678  Zero,
679  ConstantInt::get(IdxType, i),
680  };
681  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
682  Name + ".elt");
683  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
684  auto *L = IC.Builder.CreateAlignedLoad(ST->getElementType(i), Ptr,
685  EltAlign, Name + ".unpack");
686  // Propagate AA metadata. It'll still be valid on the narrowed load.
687  AAMDNodes AAMD;
688  LI.getAAMetadata(AAMD);
689  L->setAAMetadata(AAMD);
690  V = IC.Builder.CreateInsertValue(V, L, i);
691  }
692 
693  V->setName(Name);
694  return IC.replaceInstUsesWith(LI, V);
695  }
696 
697  if (auto *AT = dyn_cast<ArrayType>(T)) {
698  auto *ET = AT->getElementType();
699  auto NumElements = AT->getNumElements();
700  if (NumElements == 1) {
701  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
702  AAMDNodes AAMD;
703  LI.getAAMetadata(AAMD);
704  NewLoad->setAAMetadata(AAMD);
706  UndefValue::get(T), NewLoad, 0, Name));
707  }
708 
709  // Bail out if the array is too large. Ideally we would like to optimize
710  // arrays of arbitrary size but this has a terrible impact on compile time.
711  // The threshold here is chosen arbitrarily, maybe needs a little bit of
712  // tuning.
713  if (NumElements > IC.MaxArraySizeForCombine)
714  return nullptr;
715 
716  const DataLayout &DL = IC.getDataLayout();
717  auto EltSize = DL.getTypeAllocSize(ET);
718  auto Align = LI.getAlignment();
719  if (!Align)
720  Align = DL.getABITypeAlignment(T);
721 
722  auto *Addr = LI.getPointerOperand();
723  auto *IdxType = Type::getInt64Ty(T->getContext());
724  auto *Zero = ConstantInt::get(IdxType, 0);
725 
726  Value *V = UndefValue::get(T);
727  uint64_t Offset = 0;
728  for (uint64_t i = 0; i < NumElements; i++) {
729  Value *Indices[2] = {
730  Zero,
731  ConstantInt::get(IdxType, i),
732  };
733  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
734  Name + ".elt");
735  auto *L = IC.Builder.CreateAlignedLoad(
736  AT->getElementType(), Ptr, MinAlign(Align, Offset), Name + ".unpack");
737  AAMDNodes AAMD;
738  LI.getAAMetadata(AAMD);
739  L->setAAMetadata(AAMD);
740  V = IC.Builder.CreateInsertValue(V, L, i);
741  Offset += EltSize;
742  }
743 
744  V->setName(Name);
745  return IC.replaceInstUsesWith(LI, V);
746  }
747 
748  return nullptr;
749 }
750 
751 // If we can determine that all possible objects pointed to by the provided
752 // pointer value are, not only dereferenceable, but also definitively less than
753 // or equal to the provided maximum size, then return true. Otherwise, return
754 // false (constant global values and allocas fall into this category).
755 //
756 // FIXME: This should probably live in ValueTracking (or similar).
757 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
758  const DataLayout &DL) {
759  SmallPtrSet<Value *, 4> Visited;
760  SmallVector<Value *, 4> Worklist(1, V);
761 
762  do {
763  Value *P = Worklist.pop_back_val();
764  P = P->stripPointerCasts();
765 
766  if (!Visited.insert(P).second)
767  continue;
768 
769  if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
770  Worklist.push_back(SI->getTrueValue());
771  Worklist.push_back(SI->getFalseValue());
772  continue;
773  }
774 
775  if (PHINode *PN = dyn_cast<PHINode>(P)) {
776  for (Value *IncValue : PN->incoming_values())
777  Worklist.push_back(IncValue);
778  continue;
779  }
780 
781  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
782  if (GA->isInterposable())
783  return false;
784  Worklist.push_back(GA->getAliasee());
785  continue;
786  }
787 
788  // If we know how big this object is, and it is less than MaxSize, continue
789  // searching. Otherwise, return false.
790  if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
791  if (!AI->getAllocatedType()->isSized())
792  return false;
793 
794  ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
795  if (!CS)
796  return false;
797 
798  uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
799  // Make sure that, even if the multiplication below would wrap as an
800  // uint64_t, we still do the right thing.
801  if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
802  return false;
803  continue;
804  }
805 
806  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
807  if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
808  return false;
809 
810  uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
811  if (InitSize > MaxSize)
812  return false;
813  continue;
814  }
815 
816  return false;
817  } while (!Worklist.empty());
818 
819  return true;
820 }
821 
822 // If we're indexing into an object of a known size, and the outer index is
823 // not a constant, but having any value but zero would lead to undefined
824 // behavior, replace it with zero.
825 //
826 // For example, if we have:
827 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
828 // ...
829 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
830 // ... = load i32* %arrayidx, align 4
831 // Then we know that we can replace %x in the GEP with i64 0.
832 //
833 // FIXME: We could fold any GEP index to zero that would cause UB if it were
834 // not zero. Currently, we only handle the first such index. Also, we could
835 // also search through non-zero constant indices if we kept track of the
836 // offsets those indices implied.
838  Instruction *MemI, unsigned &Idx) {
839  if (GEPI->getNumOperands() < 2)
840  return false;
841 
842  // Find the first non-zero index of a GEP. If all indices are zero, return
843  // one past the last index.
844  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
845  unsigned I = 1;
846  for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
847  Value *V = GEPI->getOperand(I);
848  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
849  if (CI->isZero())
850  continue;
851 
852  break;
853  }
854 
855  return I;
856  };
857 
858  // Skip through initial 'zero' indices, and find the corresponding pointer
859  // type. See if the next index is not a constant.
860  Idx = FirstNZIdx(GEPI);
861  if (Idx == GEPI->getNumOperands())
862  return false;
863  if (isa<Constant>(GEPI->getOperand(Idx)))
864  return false;
865 
866  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
867  Type *AllocTy =
869  if (!AllocTy || !AllocTy->isSized())
870  return false;
871  const DataLayout &DL = IC.getDataLayout();
872  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
873 
874  // If there are more indices after the one we might replace with a zero, make
875  // sure they're all non-negative. If any of them are negative, the overall
876  // address being computed might be before the base address determined by the
877  // first non-zero index.
878  auto IsAllNonNegative = [&]() {
879  for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
880  KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
881  if (Known.isNonNegative())
882  continue;
883  return false;
884  }
885 
886  return true;
887  };
888 
889  // FIXME: If the GEP is not inbounds, and there are extra indices after the
890  // one we'll replace, those could cause the address computation to wrap
891  // (rendering the IsAllNonNegative() check below insufficient). We can do
892  // better, ignoring zero indices (and other indices we can prove small
893  // enough not to wrap).
894  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
895  return false;
896 
897  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
898  // also known to be dereferenceable.
899  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
900  IsAllNonNegative();
901 }
902 
903 // If we're indexing into an object with a variable index for the memory
904 // access, but the object has only one element, we can assume that the index
905 // will always be zero. If we replace the GEP, return it.
906 template <typename T>
908  T &MemI) {
909  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
910  unsigned Idx;
911  if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
912  Instruction *NewGEPI = GEPI->clone();
913  NewGEPI->setOperand(Idx,
914  ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
915  NewGEPI->insertBefore(GEPI);
916  MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
917  return NewGEPI;
918  }
919  }
920 
921  return nullptr;
922 }
923 
926  return false;
927 
928  auto *Ptr = SI.getPointerOperand();
929  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
930  Ptr = GEPI->getOperand(0);
931  return (isa<ConstantPointerNull>(Ptr) &&
933 }
934 
936  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
937  const Value *GEPI0 = GEPI->getOperand(0);
938  if (isa<ConstantPointerNull>(GEPI0) &&
939  !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
940  return true;
941  }
942  if (isa<UndefValue>(Op) ||
943  (isa<ConstantPointerNull>(Op) &&
945  return true;
946  return false;
947 }
948 
950  Value *Op = LI.getOperand(0);
951 
952  // Try to canonicalize the loaded type.
953  if (Instruction *Res = combineLoadToOperationType(*this, LI))
954  return Res;
955 
956  // Attempt to improve the alignment.
957  unsigned KnownAlign = getOrEnforceKnownAlignment(
958  Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
959  unsigned LoadAlign = LI.getAlignment();
960  unsigned EffectiveLoadAlign =
961  LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
962 
963  if (KnownAlign > EffectiveLoadAlign)
964  LI.setAlignment(MaybeAlign(KnownAlign));
965  else if (LoadAlign == 0)
966  LI.setAlignment(MaybeAlign(EffectiveLoadAlign));
967 
968  // Replace GEP indices if possible.
969  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
970  Worklist.Add(NewGEPI);
971  return &LI;
972  }
973 
974  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
975  return Res;
976 
977  // Do really simple store-to-load forwarding and load CSE, to catch cases
978  // where there are several consecutive memory accesses to the same location,
979  // separated by a few arithmetic operations.
980  BasicBlock::iterator BBI(LI);
981  bool IsLoadCSE = false;
982  if (Value *AvailableVal = FindAvailableLoadedValue(
983  &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
984  if (IsLoadCSE)
985  combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
986 
987  return replaceInstUsesWith(
988  LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
989  LI.getName() + ".cast"));
990  }
991 
992  // None of the following transforms are legal for volatile/ordered atomic
993  // loads. Most of them do apply for unordered atomics.
994  if (!LI.isUnordered()) return nullptr;
995 
996  // load(gep null, ...) -> unreachable
997  // load null/undef -> unreachable
998  // TODO: Consider a target hook for valid address spaces for this xforms.
999  if (canSimplifyNullLoadOrGEP(LI, Op)) {
1000  // Insert a new store to null instruction before the load to indicate
1001  // that this code is not reachable. We do this instead of inserting
1002  // an unreachable instruction directly because we cannot modify the
1003  // CFG.
1005  Constant::getNullValue(Op->getType()), &LI);
1006  SI->setDebugLoc(LI.getDebugLoc());
1007  return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1008  }
1009 
1010  if (Op->hasOneUse()) {
1011  // Change select and PHI nodes to select values instead of addresses: this
1012  // helps alias analysis out a lot, allows many others simplifications, and
1013  // exposes redundancy in the code.
1014  //
1015  // Note that we cannot do the transformation unless we know that the
1016  // introduced loads cannot trap! Something like this is valid as long as
1017  // the condition is always false: load (select bool %C, int* null, int* %G),
1018  // but it would not be valid if we transformed it to load from null
1019  // unconditionally.
1020  //
1021  if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1022  // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1023  const MaybeAlign Alignment(LI.getAlignment());
1024  if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1025  Alignment, DL, SI) &&
1026  isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1027  Alignment, DL, SI)) {
1028  LoadInst *V1 =
1029  Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1030  SI->getOperand(1)->getName() + ".val");
1031  LoadInst *V2 =
1032  Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1033  SI->getOperand(2)->getName() + ".val");
1034  assert(LI.isUnordered() && "implied by above");
1035  V1->setAlignment(Alignment);
1036  V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1037  V2->setAlignment(Alignment);
1038  V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1039  return SelectInst::Create(SI->getCondition(), V1, V2);
1040  }
1041 
1042  // load (select (cond, null, P)) -> load P
1043  if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1044  !NullPointerIsDefined(SI->getFunction(),
1045  LI.getPointerAddressSpace())) {
1046  LI.setOperand(0, SI->getOperand(2));
1047  return &LI;
1048  }
1049 
1050  // load (select (cond, P, null)) -> load P
1051  if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1052  !NullPointerIsDefined(SI->getFunction(),
1053  LI.getPointerAddressSpace())) {
1054  LI.setOperand(0, SI->getOperand(1));
1055  return &LI;
1056  }
1057  }
1058  }
1059  return nullptr;
1060 }
1061 
1062 /// Look for extractelement/insertvalue sequence that acts like a bitcast.
1063 ///
1064 /// \returns underlying value that was "cast", or nullptr otherwise.
1065 ///
1066 /// For example, if we have:
1067 ///
1068 /// %E0 = extractelement <2 x double> %U, i32 0
1069 /// %V0 = insertvalue [2 x double] undef, double %E0, 0
1070 /// %E1 = extractelement <2 x double> %U, i32 1
1071 /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1072 ///
1073 /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1074 /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1075 /// Note that %U may contain non-undef values where %V1 has undef.
1077  Value *U = nullptr;
1078  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1079  auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1080  if (!E)
1081  return nullptr;
1082  auto *W = E->getVectorOperand();
1083  if (!U)
1084  U = W;
1085  else if (U != W)
1086  return nullptr;
1087  auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1088  if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1089  return nullptr;
1090  V = IV->getAggregateOperand();
1091  }
1092  if (!isa<UndefValue>(V) ||!U)
1093  return nullptr;
1094 
1095  auto *UT = cast<VectorType>(U->getType());
1096  auto *VT = V->getType();
1097  // Check that types UT and VT are bitwise isomorphic.
1098  const auto &DL = IC.getDataLayout();
1099  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1100  return nullptr;
1101  }
1102  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1103  if (AT->getNumElements() != UT->getNumElements())
1104  return nullptr;
1105  } else {
1106  auto *ST = cast<StructType>(VT);
1107  if (ST->getNumElements() != UT->getNumElements())
1108  return nullptr;
1109  for (const auto *EltT : ST->elements()) {
1110  if (EltT != UT->getElementType())
1111  return nullptr;
1112  }
1113  }
1114  return U;
1115 }
1116 
1117 /// Combine stores to match the type of value being stored.
1118 ///
1119 /// The core idea here is that the memory does not have any intrinsic type and
1120 /// where we can we should match the type of a store to the type of value being
1121 /// stored.
1122 ///
1123 /// However, this routine must never change the width of a store or the number of
1124 /// stores as that would introduce a semantic change. This combine is expected to
1125 /// be a semantic no-op which just allows stores to more closely model the types
1126 /// of their incoming values.
1127 ///
1128 /// Currently, we also refuse to change the precise type used for an atomic or
1129 /// volatile store. This is debatable, and might be reasonable to change later.
1130 /// However, it is risky in case some backend or other part of LLVM is relying
1131 /// on the exact type stored to select appropriate atomic operations.
1132 ///
1133 /// \returns true if the store was successfully combined away. This indicates
1134 /// the caller must erase the store instruction. We have to let the caller erase
1135 /// the store instruction as otherwise there is no way to signal whether it was
1136 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
1138  // FIXME: We could probably with some care handle both volatile and ordered
1139  // atomic stores here but it isn't clear that this is important.
1140  if (!SI.isUnordered())
1141  return false;
1142 
1143  // swifterror values can't be bitcasted.
1144  if (SI.getPointerOperand()->isSwiftError())
1145  return false;
1146 
1147  Value *V = SI.getValueOperand();
1148 
1149  // Fold away bit casts of the stored value by storing the original type.
1150  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1151  V = BC->getOperand(0);
1152  if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1153  combineStoreToNewValue(IC, SI, V);
1154  return true;
1155  }
1156  }
1157 
1158  if (Value *U = likeBitCastFromVector(IC, V))
1159  if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1160  combineStoreToNewValue(IC, SI, U);
1161  return true;
1162  }
1163 
1164  // FIXME: We should also canonicalize stores of vectors when their elements
1165  // are cast to other types.
1166  return false;
1167 }
1168 
1170  // FIXME: We could probably with some care handle both volatile and atomic
1171  // stores here but it isn't clear that this is important.
1172  if (!SI.isSimple())
1173  return false;
1174 
1175  Value *V = SI.getValueOperand();
1176  Type *T = V->getType();
1177 
1178  if (!T->isAggregateType())
1179  return false;
1180 
1181  if (auto *ST = dyn_cast<StructType>(T)) {
1182  // If the struct only have one element, we unpack.
1183  unsigned Count = ST->getNumElements();
1184  if (Count == 1) {
1185  V = IC.Builder.CreateExtractValue(V, 0);
1186  combineStoreToNewValue(IC, SI, V);
1187  return true;
1188  }
1189 
1190  // We don't want to break loads with padding here as we'd loose
1191  // the knowledge that padding exists for the rest of the pipeline.
1192  const DataLayout &DL = IC.getDataLayout();
1193  auto *SL = DL.getStructLayout(ST);
1194  if (SL->hasPadding())
1195  return false;
1196 
1197  auto Align = SI.getAlignment();
1198  if (!Align)
1200 
1201  SmallString<16> EltName = V->getName();
1202  EltName += ".elt";
1203  auto *Addr = SI.getPointerOperand();
1204  SmallString<16> AddrName = Addr->getName();
1205  AddrName += ".repack";
1206 
1207  auto *IdxType = Type::getInt32Ty(ST->getContext());
1208  auto *Zero = ConstantInt::get(IdxType, 0);
1209  for (unsigned i = 0; i < Count; i++) {
1210  Value *Indices[2] = {
1211  Zero,
1212  ConstantInt::get(IdxType, i),
1213  };
1214  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1215  AddrName);
1216  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1217  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1218  llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1219  AAMDNodes AAMD;
1220  SI.getAAMetadata(AAMD);
1221  NS->setAAMetadata(AAMD);
1222  }
1223 
1224  return true;
1225  }
1226 
1227  if (auto *AT = dyn_cast<ArrayType>(T)) {
1228  // If the array only have one element, we unpack.
1229  auto NumElements = AT->getNumElements();
1230  if (NumElements == 1) {
1231  V = IC.Builder.CreateExtractValue(V, 0);
1232  combineStoreToNewValue(IC, SI, V);
1233  return true;
1234  }
1235 
1236  // Bail out if the array is too large. Ideally we would like to optimize
1237  // arrays of arbitrary size but this has a terrible impact on compile time.
1238  // The threshold here is chosen arbitrarily, maybe needs a little bit of
1239  // tuning.
1240  if (NumElements > IC.MaxArraySizeForCombine)
1241  return false;
1242 
1243  const DataLayout &DL = IC.getDataLayout();
1244  auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1245  auto Align = SI.getAlignment();
1246  if (!Align)
1247  Align = DL.getABITypeAlignment(T);
1248 
1249  SmallString<16> EltName = V->getName();
1250  EltName += ".elt";
1251  auto *Addr = SI.getPointerOperand();
1252  SmallString<16> AddrName = Addr->getName();
1253  AddrName += ".repack";
1254 
1255  auto *IdxType = Type::getInt64Ty(T->getContext());
1256  auto *Zero = ConstantInt::get(IdxType, 0);
1257 
1258  uint64_t Offset = 0;
1259  for (uint64_t i = 0; i < NumElements; i++) {
1260  Value *Indices[2] = {
1261  Zero,
1262  ConstantInt::get(IdxType, i),
1263  };
1264  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1265  AddrName);
1266  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1267  auto EltAlign = MinAlign(Align, Offset);
1268  Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1269  AAMDNodes AAMD;
1270  SI.getAAMetadata(AAMD);
1271  NS->setAAMetadata(AAMD);
1272  Offset += EltSize;
1273  }
1274 
1275  return true;
1276  }
1277 
1278  return false;
1279 }
1280 
1281 /// equivalentAddressValues - Test if A and B will obviously have the same
1282 /// value. This includes recognizing that %t0 and %t1 will have the same
1283 /// value in code like this:
1284 /// %t0 = getelementptr \@a, 0, 3
1285 /// store i32 0, i32* %t0
1286 /// %t1 = getelementptr \@a, 0, 3
1287 /// %t2 = load i32* %t1
1288 ///
1290  // Test if the values are trivially equivalent.
1291  if (A == B) return true;
1292 
1293  // Test if the values come form identical arithmetic instructions.
1294  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1295  // its only used to compare two uses within the same basic block, which
1296  // means that they'll always either have the same value or one of them
1297  // will have an undefined value.
1298  if (isa<BinaryOperator>(A) ||
1299  isa<CastInst>(A) ||
1300  isa<PHINode>(A) ||
1301  isa<GetElementPtrInst>(A))
1302  if (Instruction *BI = dyn_cast<Instruction>(B))
1303  if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1304  return true;
1305 
1306  // Otherwise they may not be equivalent.
1307  return false;
1308 }
1309 
1310 /// Converts store (bitcast (load (bitcast (select ...)))) to
1311 /// store (load (select ...)), where select is minmax:
1312 /// select ((cmp load V1, load V2), V1, V2).
1314  StoreInst &SI) {
1315  // bitcast?
1316  if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1317  return false;
1318  // load? integer?
1319  Value *LoadAddr;
1320  if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1321  return false;
1322  auto *LI = cast<LoadInst>(SI.getValueOperand());
1323  if (!LI->getType()->isIntegerTy())
1324  return false;
1325  if (!isMinMaxWithLoads(LoadAddr))
1326  return false;
1327 
1328  if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1329  auto *SI = dyn_cast<StoreInst>(U);
1330  return SI && SI->getPointerOperand() != LI &&
1331  peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1332  !SI->getPointerOperand()->isSwiftError();
1333  }))
1334  return false;
1335 
1336  IC.Builder.SetInsertPoint(LI);
1337  LoadInst *NewLI = combineLoadToNewType(
1338  IC, *LI, LoadAddr->getType()->getPointerElementType());
1339  // Replace all the stores with stores of the newly loaded value.
1340  for (auto *UI : LI->users()) {
1341  auto *USI = cast<StoreInst>(UI);
1342  IC.Builder.SetInsertPoint(USI);
1343  combineStoreToNewValue(IC, *USI, NewLI);
1344  }
1345  IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1346  IC.eraseInstFromFunction(*LI);
1347  return true;
1348 }
1349 
1351  Value *Val = SI.getOperand(0);
1352  Value *Ptr = SI.getOperand(1);
1353 
1354  // Try to canonicalize the stored type.
1355  if (combineStoreToValueType(*this, SI))
1356  return eraseInstFromFunction(SI);
1357 
1358  // Attempt to improve the alignment.
1359  const Align KnownAlign = Align(getOrEnforceKnownAlignment(
1360  Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT));
1361  const MaybeAlign StoreAlign = MaybeAlign(SI.getAlignment());
1362  const Align EffectiveStoreAlign =
1363  StoreAlign ? *StoreAlign : Align(DL.getABITypeAlignment(Val->getType()));
1364 
1365  if (KnownAlign > EffectiveStoreAlign)
1366  SI.setAlignment(KnownAlign);
1367  else if (!StoreAlign)
1368  SI.setAlignment(EffectiveStoreAlign);
1369 
1370  // Try to canonicalize the stored type.
1371  if (unpackStoreToAggregate(*this, SI))
1372  return eraseInstFromFunction(SI);
1373 
1374  if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1375  return eraseInstFromFunction(SI);
1376 
1377  // Replace GEP indices if possible.
1378  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1379  Worklist.Add(NewGEPI);
1380  return &SI;
1381  }
1382 
1383  // Don't hack volatile/ordered stores.
1384  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1385  if (!SI.isUnordered()) return nullptr;
1386 
1387  // If the RHS is an alloca with a single use, zapify the store, making the
1388  // alloca dead.
1389  if (Ptr->hasOneUse()) {
1390  if (isa<AllocaInst>(Ptr))
1391  return eraseInstFromFunction(SI);
1392  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1393  if (isa<AllocaInst>(GEP->getOperand(0))) {
1394  if (GEP->getOperand(0)->hasOneUse())
1395  return eraseInstFromFunction(SI);
1396  }
1397  }
1398  }
1399 
1400  // If we have a store to a location which is known constant, we can conclude
1401  // that the store must be storing the constant value (else the memory
1402  // wouldn't be constant), and this must be a noop.
1403  if (AA->pointsToConstantMemory(Ptr))
1404  return eraseInstFromFunction(SI);
1405 
1406  // Do really simple DSE, to catch cases where there are several consecutive
1407  // stores to the same location, separated by a few arithmetic operations. This
1408  // situation often occurs with bitfield accesses.
1409  BasicBlock::iterator BBI(SI);
1410  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1411  --ScanInsts) {
1412  --BBI;
1413  // Don't count debug info directives, lest they affect codegen,
1414  // and we skip pointer-to-pointer bitcasts, which are NOPs.
1415  if (isa<DbgInfoIntrinsic>(BBI) ||
1416  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1417  ScanInsts++;
1418  continue;
1419  }
1420 
1421  if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1422  // Prev store isn't volatile, and stores to the same location?
1423  if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1424  SI.getOperand(1))) {
1425  ++NumDeadStore;
1426  ++BBI;
1427  eraseInstFromFunction(*PrevSI);
1428  continue;
1429  }
1430  break;
1431  }
1432 
1433  // If this is a load, we have to stop. However, if the loaded value is from
1434  // the pointer we're loading and is producing the pointer we're storing,
1435  // then *this* store is dead (X = load P; store X -> P).
1436  if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1437  if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1438  assert(SI.isUnordered() && "can't eliminate ordering operation");
1439  return eraseInstFromFunction(SI);
1440  }
1441 
1442  // Otherwise, this is a load from some other location. Stores before it
1443  // may not be dead.
1444  break;
1445  }
1446 
1447  // Don't skip over loads, throws or things that can modify memory.
1448  if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1449  break;
1450  }
1451 
1452  // store X, null -> turns into 'unreachable' in SimplifyCFG
1453  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1454  if (canSimplifyNullStoreOrGEP(SI)) {
1455  if (!isa<UndefValue>(Val)) {
1456  SI.setOperand(0, UndefValue::get(Val->getType()));
1457  if (Instruction *U = dyn_cast<Instruction>(Val))
1458  Worklist.Add(U); // Dropped a use.
1459  }
1460  return nullptr; // Do not modify these!
1461  }
1462 
1463  // store undef, Ptr -> noop
1464  if (isa<UndefValue>(Val))
1465  return eraseInstFromFunction(SI);
1466 
1467  // If this store is the second-to-last instruction in the basic block
1468  // (excluding debug info and bitcasts of pointers) and if the block ends with
1469  // an unconditional branch, try to move the store to the successor block.
1470  BBI = SI.getIterator();
1471  do {
1472  ++BBI;
1473  } while (isa<DbgInfoIntrinsic>(BBI) ||
1474  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
1475 
1476  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1477  if (BI->isUnconditional())
1478  mergeStoreIntoSuccessor(SI);
1479 
1480  return nullptr;
1481 }
1482 
1483 /// Try to transform:
1484 /// if () { *P = v1; } else { *P = v2 }
1485 /// or:
1486 /// *P = v1; if () { *P = v2; }
1487 /// into a phi node with a store in the successor.
1488 bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
1489  assert(SI.isUnordered() &&
1490  "This code has not been audited for volatile or ordered store case.");
1491 
1492  // Check if the successor block has exactly 2 incoming edges.
1493  BasicBlock *StoreBB = SI.getParent();
1494  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1495  if (!DestBB->hasNPredecessors(2))
1496  return false;
1497 
1498  // Capture the other block (the block that doesn't contain our store).
1499  pred_iterator PredIter = pred_begin(DestBB);
1500  if (*PredIter == StoreBB)
1501  ++PredIter;
1502  BasicBlock *OtherBB = *PredIter;
1503 
1504  // Bail out if all of the relevant blocks aren't distinct. This can happen,
1505  // for example, if SI is in an infinite loop.
1506  if (StoreBB == DestBB || OtherBB == DestBB)
1507  return false;
1508 
1509  // Verify that the other block ends in a branch and is not otherwise empty.
1510  BasicBlock::iterator BBI(OtherBB->getTerminator());
1511  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1512  if (!OtherBr || BBI == OtherBB->begin())
1513  return false;
1514 
1515  // If the other block ends in an unconditional branch, check for the 'if then
1516  // else' case. There is an instruction before the branch.
1517  StoreInst *OtherStore = nullptr;
1518  if (OtherBr->isUnconditional()) {
1519  --BBI;
1520  // Skip over debugging info.
1521  while (isa<DbgInfoIntrinsic>(BBI) ||
1522  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1523  if (BBI==OtherBB->begin())
1524  return false;
1525  --BBI;
1526  }
1527  // If this isn't a store, isn't a store to the same location, or is not the
1528  // right kind of store, bail out.
1529  OtherStore = dyn_cast<StoreInst>(BBI);
1530  if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1531  !SI.isSameOperationAs(OtherStore))
1532  return false;
1533  } else {
1534  // Otherwise, the other block ended with a conditional branch. If one of the
1535  // destinations is StoreBB, then we have the if/then case.
1536  if (OtherBr->getSuccessor(0) != StoreBB &&
1537  OtherBr->getSuccessor(1) != StoreBB)
1538  return false;
1539 
1540  // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1541  // if/then triangle. See if there is a store to the same ptr as SI that
1542  // lives in OtherBB.
1543  for (;; --BBI) {
1544  // Check to see if we find the matching store.
1545  if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1546  if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1547  !SI.isSameOperationAs(OtherStore))
1548  return false;
1549  break;
1550  }
1551  // If we find something that may be using or overwriting the stored
1552  // value, or if we run out of instructions, we can't do the transform.
1553  if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1554  BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1555  return false;
1556  }
1557 
1558  // In order to eliminate the store in OtherBr, we have to make sure nothing
1559  // reads or overwrites the stored value in StoreBB.
1560  for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1561  // FIXME: This should really be AA driven.
1562  if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1563  return false;
1564  }
1565  }
1566 
1567  // Insert a PHI node now if we need it.
1568  Value *MergedVal = OtherStore->getOperand(0);
1569  // The debug locations of the original instructions might differ. Merge them.
1571  OtherStore->getDebugLoc());
1572  if (MergedVal != SI.getOperand(0)) {
1573  PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1574  PN->addIncoming(SI.getOperand(0), SI.getParent());
1575  PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1576  MergedVal = InsertNewInstBefore(PN, DestBB->front());
1577  PN->setDebugLoc(MergedLoc);
1578  }
1579 
1580  // Advance to a place where it is safe to insert the new store and insert it.
1581  BBI = DestBB->getFirstInsertionPt();
1582  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(),
1583  MaybeAlign(SI.getAlignment()),
1584  SI.getOrdering(), SI.getSyncScopeID());
1585  InsertNewInstBefore(NewSI, *BBI);
1586  NewSI->setDebugLoc(MergedLoc);
1587 
1588  // If the two stores had AA tags, merge them.
1589  AAMDNodes AATags;
1590  SI.getAAMetadata(AATags);
1591  if (AATags) {
1592  OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1593  NewSI->setAAMetadata(AATags);
1594  }
1595 
1596  // Nuke the old stores.
1597  eraseInstFromFunction(SI);
1598  eraseInstFromFunction(*OtherStore);
1599  return true;
1600 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1695
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:415
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
bool isSimple() const
Definition: Instructions.h:281
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:78
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:267
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1181
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1562
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:1633
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:262
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1611
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
bool isUnordered() const
Definition: Instructions.h:409
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:613
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:907
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt...
Definition: STLExtras.h:1339
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:637
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:390
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:252
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:138
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:635
static LoadInst * combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
An instruction for reading from memory.
Definition: Instructions.h:169
static bool pointsToConstantGlobal(Value *V)
pointsToConstantGlobal - Return true if V (possibly indirectly) points to some part of a constant glo...
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:181
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type. ...
Definition: DataLayout.h:474
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:401
void setAlignment(MaybeAlign Align)
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:466
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:289
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1648
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:275
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:233
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:381
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:361
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:946
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:659
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
static Instruction * replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, T &MemI)
The core instruction combiner logic.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Type * getSourceElementType() const
Definition: Instructions.h:978
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:424
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1963
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:454
static Instruction * simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI)
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:764
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:212
This class represents a no-op cast from one type to another.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:25
An instruction for storing to memory.
Definition: Instructions.h:325
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
const DataLayout & getDataLayout() const
const BasicBlock & getEntryBlock() const
Definition: Function.h:669
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:881
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
#define P(N)
static Instruction * combineLoadToOperationType(InstCombiner &IC, LoadInst &LI)
Combine loads to match the type of their uses&#39; value after looking through intervening bitcasts...
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:223
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
void setAlignment(MaybeAlign Align)
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:285
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static StoreInst * combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
bool mayThrow() const
Return true if this instruction may throw an exception.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:587
bool isUnordered() const
Definition: Instructions.h:283
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
constexpr double e
Definition: MathExtras.h:57
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
Instruction * visitAllocaInst(AllocaInst &AI)
Value * getPointerOperand()
Definition: Instructions.h:289
self_iterator getIterator()
Definition: ilist_node.h:81
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:529
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2351
size_t size() const
Definition: SmallVector.h:52
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction *> &ToDelete)
isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) pointer to an alloca...
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2032
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
static bool isSupportedAtomicType(Type *Ty)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:254
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:755
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:258
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:184
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1604
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
void setAlignment(MaybeAlign Align)
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
Get all metadata attached to this Instruction.
Definition: Instruction.h:260
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:420
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:380
This class wraps the llvm.memcpy/memmove intrinsics.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:356
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:242
static Type * getIndexedType(Type *Ty, ArrayRef< Value *> IdxList)
Returns the type of the element that would be loaded with a load instruction with the specified param...
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:180
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:378
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, StoreInst &SI)
Converts store (bitcast (load (bitcast (select ...)))) to store (load (select ...)), where select is minmax: select ((cmp load V1, load V2), V1, V2).
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:264
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static Value * likeBitCastFromVector(InstCombiner &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:2376
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:587
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
Instruction * InsertNewInstBefore(Instruction *New, Instruction &Old)
Inserts an instruction New before instruction Old.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:368
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:295
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:941
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:396
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * visitLoadInst(LoadInst &LI)
LLVM Value Representation.
Definition: Value.h:74
This file provides internal interfaces used to implement the InstCombine.
Instruction * visitStoreInst(StoreInst &SI)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2363
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:433
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:98
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:203
bool isSimple() const
Definition: Instructions.h:407
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2359
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:418
static Instruction * unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI)
bool use_empty() const
Definition: Value.h:343
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:558
static bool isMinMaxWithLoads(Value *V)
Returns true if instruction represent minmax pattern like: select ((cmp load V1, load V2)...
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
user_iterator user_end()
Definition: Value.h:404