LLVM  10.0.0svn
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for load, store and alloca.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/MapVector.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/ConstantRange.h"
20 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/MDBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
27 using namespace llvm;
28 using namespace PatternMatch;
29 
30 #define DEBUG_TYPE "instcombine"
31 
32 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34 
35 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
36 /// some part of a constant global variable. This intentionally only accepts
37 /// constant expressions because we can't rewrite arbitrary instructions.
38 static bool pointsToConstantGlobal(Value *V) {
39  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
40  return GV->isConstant();
41 
42  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
43  if (CE->getOpcode() == Instruction::BitCast ||
44  CE->getOpcode() == Instruction::AddrSpaceCast ||
45  CE->getOpcode() == Instruction::GetElementPtr)
46  return pointsToConstantGlobal(CE->getOperand(0));
47  }
48  return false;
49 }
50 
51 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
52 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
53 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
54 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
55 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
56 /// the alloca, and if the source pointer is a pointer to a constant global, we
57 /// can optimize this.
58 static bool
61  // We track lifetime intrinsics as we encounter them. If we decide to go
62  // ahead and replace the value with the global, this lets the caller quickly
63  // eliminate the markers.
64 
65  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
66  ValuesToInspect.emplace_back(V, false);
67  while (!ValuesToInspect.empty()) {
68  auto ValuePair = ValuesToInspect.pop_back_val();
69  const bool IsOffset = ValuePair.second;
70  for (auto &U : ValuePair.first->uses()) {
71  auto *I = cast<Instruction>(U.getUser());
72 
73  if (auto *LI = dyn_cast<LoadInst>(I)) {
74  // Ignore non-volatile loads, they are always ok.
75  if (!LI->isSimple()) return false;
76  continue;
77  }
78 
79  if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
80  // If uses of the bitcast are ok, we are ok.
81  ValuesToInspect.emplace_back(I, IsOffset);
82  continue;
83  }
84  if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
85  // If the GEP has all zero indices, it doesn't offset the pointer. If it
86  // doesn't, it does.
87  ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
88  continue;
89  }
90 
91  if (auto *Call = dyn_cast<CallBase>(I)) {
92  // If this is the function being called then we treat it like a load and
93  // ignore it.
94  if (Call->isCallee(&U))
95  continue;
96 
97  unsigned DataOpNo = Call->getDataOperandNo(&U);
98  bool IsArgOperand = Call->isArgOperand(&U);
99 
100  // Inalloca arguments are clobbered by the call.
101  if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
102  return false;
103 
104  // If this is a readonly/readnone call site, then we know it is just a
105  // load (but one that potentially returns the value itself), so we can
106  // ignore it if we know that the value isn't captured.
107  if (Call->onlyReadsMemory() &&
108  (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
109  continue;
110 
111  // If this is being passed as a byval argument, the caller is making a
112  // copy, so it is only a read of the alloca.
113  if (IsArgOperand && Call->isByValArgument(DataOpNo))
114  continue;
115  }
116 
117  // Lifetime intrinsics can be handled by the caller.
118  if (I->isLifetimeStartOrEnd()) {
119  assert(I->use_empty() && "Lifetime markers have no result to use!");
120  ToDelete.push_back(I);
121  continue;
122  }
123 
124  // If this is isn't our memcpy/memmove, reject it as something we can't
125  // handle.
127  if (!MI)
128  return false;
129 
130  // If the transfer is using the alloca as a source of the transfer, then
131  // ignore it since it is a load (unless the transfer is volatile).
132  if (U.getOperandNo() == 1) {
133  if (MI->isVolatile()) return false;
134  continue;
135  }
136 
137  // If we already have seen a copy, reject the second one.
138  if (TheCopy) return false;
139 
140  // If the pointer has been offset from the start of the alloca, we can't
141  // safely handle this.
142  if (IsOffset) return false;
143 
144  // If the memintrinsic isn't using the alloca as the dest, reject it.
145  if (U.getOperandNo() != 0) return false;
146 
147  // If the source of the memcpy/move is not a constant global, reject it.
148  if (!pointsToConstantGlobal(MI->getSource()))
149  return false;
150 
151  // Otherwise, the transform is safe. Remember the copy instruction.
152  TheCopy = MI;
153  }
154  }
155  return true;
156 }
157 
158 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
159 /// modified by a copy from a constant global. If we can prove this, we can
160 /// replace any uses of the alloca with uses of the global directly.
161 static MemTransferInst *
163  SmallVectorImpl<Instruction *> &ToDelete) {
164  MemTransferInst *TheCopy = nullptr;
165  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
166  return TheCopy;
167  return nullptr;
168 }
169 
170 /// Returns true if V is dereferenceable for size of alloca.
171 static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
172  const DataLayout &DL) {
173  if (AI->isArrayAllocation())
174  return false;
175  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
176  if (!AllocaSize)
177  return false;
179  APInt(64, AllocaSize), DL);
180 }
181 
183  // Check for array size of 1 (scalar allocation).
184  if (!AI.isArrayAllocation()) {
185  // i32 1 is the canonical array size for scalar allocations.
186  if (AI.getArraySize()->getType()->isIntegerTy(32))
187  return nullptr;
188 
189  // Canonicalize it.
190  Value *V = IC.Builder.getInt32(1);
191  AI.setOperand(0, V);
192  return &AI;
193  }
194 
195  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
196  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
197  if (C->getValue().getActiveBits() <= 64) {
198  Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
199  AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
200  New->setAlignment(AI.getAlignment());
201 
202  // Scan to the end of the allocation instructions, to skip over a block of
203  // allocas if possible...also skip interleaved debug info
204  //
205  BasicBlock::iterator It(New);
206  while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
207  ++It;
208 
209  // Now that I is pointing to the first non-allocation-inst in the block,
210  // insert our getelementptr instruction...
211  //
212  Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
213  Value *NullIdx = Constant::getNullValue(IdxTy);
214  Value *Idx[2] = {NullIdx, NullIdx};
216  NewTy, New, Idx, New->getName() + ".sub");
217  IC.InsertNewInstBefore(GEP, *It);
218 
219  // Now make everything use the getelementptr instead of the original
220  // allocation.
221  return IC.replaceInstUsesWith(AI, GEP);
222  }
223  }
224 
225  if (isa<UndefValue>(AI.getArraySize()))
227 
228  // Ensure that the alloca array size argument has type intptr_t, so that
229  // any casting is exposed early.
230  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
231  if (AI.getArraySize()->getType() != IntPtrTy) {
232  Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
233  AI.setOperand(0, V);
234  return &AI;
235  }
236 
237  return nullptr;
238 }
239 
240 namespace {
241 // If I and V are pointers in different address space, it is not allowed to
242 // use replaceAllUsesWith since I and V have different types. A
243 // non-target-specific transformation should not use addrspacecast on V since
244 // the two address space may be disjoint depending on target.
245 //
246 // This class chases down uses of the old pointer until reaching the load
247 // instructions, then replaces the old pointer in the load instructions with
248 // the new pointer. If during the chasing it sees bitcast or GEP, it will
249 // create new bitcast or GEP with the new pointer and use them in the load
250 // instruction.
251 class PointerReplacer {
252 public:
253  PointerReplacer(InstCombiner &IC) : IC(IC) {}
254  void replacePointer(Instruction &I, Value *V);
255 
256 private:
257  void findLoadAndReplace(Instruction &I);
258  void replace(Instruction *I);
259  Value *getReplacement(Value *I);
260 
263  InstCombiner &IC;
264 };
265 } // end anonymous namespace
266 
267 void PointerReplacer::findLoadAndReplace(Instruction &I) {
268  for (auto U : I.users()) {
269  auto *Inst = dyn_cast<Instruction>(&*U);
270  if (!Inst)
271  return;
272  LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
273  if (isa<LoadInst>(Inst)) {
274  for (auto P : Path)
275  replace(P);
276  replace(Inst);
277  } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
278  Path.push_back(Inst);
279  findLoadAndReplace(*Inst);
280  Path.pop_back();
281  } else {
282  return;
283  }
284  }
285 }
286 
287 Value *PointerReplacer::getReplacement(Value *V) {
288  auto Loc = WorkMap.find(V);
289  if (Loc != WorkMap.end())
290  return Loc->second;
291  return nullptr;
292 }
293 
295  if (getReplacement(I))
296  return;
297 
298  if (auto *LT = dyn_cast<LoadInst>(I)) {
299  auto *V = getReplacement(LT->getPointerOperand());
300  assert(V && "Operand not replaced");
301  auto *NewI = new LoadInst(I->getType(), V);
302  NewI->takeName(LT);
303  IC.InsertNewInstWith(NewI, *LT);
304  IC.replaceInstUsesWith(*LT, NewI);
305  WorkMap[LT] = NewI;
306  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
307  auto *V = getReplacement(GEP->getPointerOperand());
308  assert(V && "Operand not replaced");
309  SmallVector<Value *, 8> Indices;
310  Indices.append(GEP->idx_begin(), GEP->idx_end());
311  auto *NewI = GetElementPtrInst::Create(
312  V->getType()->getPointerElementType(), V, Indices);
313  IC.InsertNewInstWith(NewI, *GEP);
314  NewI->takeName(GEP);
315  WorkMap[GEP] = NewI;
316  } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
317  auto *V = getReplacement(BC->getOperand(0));
318  assert(V && "Operand not replaced");
319  auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
321  auto *NewI = new BitCastInst(V, NewT);
322  IC.InsertNewInstWith(NewI, *BC);
323  NewI->takeName(BC);
324  WorkMap[BC] = NewI;
325  } else {
326  llvm_unreachable("should never reach here");
327  }
328 }
329 
330 void PointerReplacer::replacePointer(Instruction &I, Value *V) {
331 #ifndef NDEBUG
332  auto *PT = cast<PointerType>(I.getType());
333  auto *NT = cast<PointerType>(V->getType());
334  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
335  "Invalid usage");
336 #endif
337  WorkMap[&I] = V;
338  findLoadAndReplace(I);
339 }
340 
342  if (auto *I = simplifyAllocaArraySize(*this, AI))
343  return I;
344 
345  if (AI.getAllocatedType()->isSized()) {
346  // If the alignment is 0 (unspecified), assign it the preferred alignment.
347  if (AI.getAlignment() == 0)
348  AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
349 
350  // Move all alloca's of zero byte objects to the entry block and merge them
351  // together. Note that we only do this for alloca's, because malloc should
352  // allocate and return a unique pointer, even for a zero byte allocation.
353  if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
354  // For a zero sized alloca there is no point in doing an array allocation.
355  // This is helpful if the array size is a complicated expression not used
356  // elsewhere.
357  if (AI.isArrayAllocation()) {
358  AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
359  return &AI;
360  }
361 
362  // Get the first instruction in the entry block.
363  BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
364  Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
365  if (FirstInst != &AI) {
366  // If the entry block doesn't start with a zero-size alloca then move
367  // this one to the start of the entry block. There is no problem with
368  // dominance as the array size was forced to a constant earlier already.
369  AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
370  if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
371  DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
372  AI.moveBefore(FirstInst);
373  return &AI;
374  }
375 
376  // If the alignment of the entry block alloca is 0 (unspecified),
377  // assign it the preferred alignment.
378  if (EntryAI->getAlignment() == 0)
379  EntryAI->setAlignment(
380  DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
381  // Replace this zero-sized alloca with the one at the start of the entry
382  // block after ensuring that the address will be aligned enough for both
383  // types.
384  unsigned MaxAlign = std::max(EntryAI->getAlignment(),
385  AI.getAlignment());
386  EntryAI->setAlignment(MaxAlign);
387  if (AI.getType() != EntryAI->getType())
388  return new BitCastInst(EntryAI, AI.getType());
389  return replaceInstUsesWith(AI, EntryAI);
390  }
391  }
392  }
393 
394  if (AI.getAlignment()) {
395  // Check to see if this allocation is only modified by a memcpy/memmove from
396  // a constant global whose alignment is equal to or exceeds that of the
397  // allocation. If this is the case, we can change all users to use
398  // the constant global instead. This is commonly produced by the CFE by
399  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
400  // is only subsequently read.
402  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
403  unsigned SourceAlign = getOrEnforceKnownAlignment(
404  Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
405  if (AI.getAlignment() <= SourceAlign &&
406  isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
407  LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
408  LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
409  for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
410  eraseInstFromFunction(*ToDelete[i]);
411  Constant *TheSrc = cast<Constant>(Copy->getSource());
412  auto *SrcTy = TheSrc->getType();
413  auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
414  SrcTy->getPointerAddressSpace());
415  Constant *Cast =
417  if (AI.getType()->getPointerAddressSpace() ==
418  SrcTy->getPointerAddressSpace()) {
419  Instruction *NewI = replaceInstUsesWith(AI, Cast);
420  eraseInstFromFunction(*Copy);
421  ++NumGlobalCopies;
422  return NewI;
423  } else {
424  PointerReplacer PtrReplacer(*this);
425  PtrReplacer.replacePointer(AI, Cast);
426  ++NumGlobalCopies;
427  }
428  }
429  }
430  }
431 
432  // At last, use the generic allocation site handler to aggressively remove
433  // unused allocas.
434  return visitAllocSite(AI);
435 }
436 
437 // Are we allowed to form a atomic load or store of this type?
438 static bool isSupportedAtomicType(Type *Ty) {
439  return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
440 }
441 
442 /// Helper to combine a load to a new type.
443 ///
444 /// This just does the work of combining a load to a new type. It handles
445 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
446 /// loaded *value* type. This will convert it to a pointer, cast the operand to
447 /// that pointer type, load it, etc.
448 ///
449 /// Note that this will create all of the instructions with whatever insert
450 /// point the \c InstCombiner currently is using.
452  const Twine &Suffix = "") {
453  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
454  "can't fold an atomic load to requested type");
455 
456  Value *Ptr = LI.getPointerOperand();
457  unsigned AS = LI.getPointerAddressSpace();
458  Value *NewPtr = nullptr;
459  if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
460  NewPtr->getType()->getPointerElementType() == NewTy &&
461  NewPtr->getType()->getPointerAddressSpace() == AS))
462  NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
463 
464  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
465  NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
466  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
467  copyMetadataForLoad(*NewLoad, LI);
468  return NewLoad;
469 }
470 
471 /// Combine a store to a new type.
472 ///
473 /// Returns the newly created store instruction.
475  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
476  "can't fold an atomic store of requested type");
477 
478  Value *Ptr = SI.getPointerOperand();
479  unsigned AS = SI.getPointerAddressSpace();
481  SI.getAllMetadata(MD);
482 
483  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
484  V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
485  SI.getAlignment(), SI.isVolatile());
486  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
487  for (const auto &MDPair : MD) {
488  unsigned ID = MDPair.first;
489  MDNode *N = MDPair.second;
490  // Note, essentially every kind of metadata should be preserved here! This
491  // routine is supposed to clone a store instruction changing *only its
492  // type*. The only metadata it makes sense to drop is metadata which is
493  // invalidated when the pointer type changes. This should essentially
494  // never be the case in LLVM, but we explicitly switch over only known
495  // metadata to be conservatively correct. If you are adding metadata to
496  // LLVM which pertains to stores, you almost certainly want to add it
497  // here.
498  switch (ID) {
499  case LLVMContext::MD_dbg:
500  case LLVMContext::MD_tbaa:
501  case LLVMContext::MD_prof:
502  case LLVMContext::MD_fpmath:
503  case LLVMContext::MD_tbaa_struct:
504  case LLVMContext::MD_alias_scope:
505  case LLVMContext::MD_noalias:
506  case LLVMContext::MD_nontemporal:
507  case LLVMContext::MD_mem_parallel_loop_access:
508  case LLVMContext::MD_access_group:
509  // All of these directly apply.
510  NewStore->setMetadata(ID, N);
511  break;
512  case LLVMContext::MD_invariant_load:
513  case LLVMContext::MD_nonnull:
514  case LLVMContext::MD_range:
515  case LLVMContext::MD_align:
516  case LLVMContext::MD_dereferenceable:
517  case LLVMContext::MD_dereferenceable_or_null:
518  // These don't apply for stores.
519  break;
520  }
521  }
522 
523  return NewStore;
524 }
525 
526 /// Returns true if instruction represent minmax pattern like:
527 /// select ((cmp load V1, load V2), V1, V2).
528 static bool isMinMaxWithLoads(Value *V) {
529  assert(V->getType()->isPointerTy() && "Expected pointer type.");
530  // Ignore possible ty* to ixx* bitcast.
531  V = peekThroughBitcast(V);
532  // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
533  // pattern.
534  CmpInst::Predicate Pred;
535  Instruction *L1;
536  Instruction *L2;
537  Value *LHS;
538  Value *RHS;
539  if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
540  m_Value(LHS), m_Value(RHS))))
541  return false;
542  return (match(L1, m_Load(m_Specific(LHS))) &&
543  match(L2, m_Load(m_Specific(RHS)))) ||
544  (match(L1, m_Load(m_Specific(RHS))) &&
545  match(L2, m_Load(m_Specific(LHS))));
546 }
547 
548 /// Combine loads to match the type of their uses' value after looking
549 /// through intervening bitcasts.
550 ///
551 /// The core idea here is that if the result of a load is used in an operation,
552 /// we should load the type most conducive to that operation. For example, when
553 /// loading an integer and converting that immediately to a pointer, we should
554 /// instead directly load a pointer.
555 ///
556 /// However, this routine must never change the width of a load or the number of
557 /// loads as that would introduce a semantic change. This combine is expected to
558 /// be a semantic no-op which just allows loads to more closely model the types
559 /// of their consuming operations.
560 ///
561 /// Currently, we also refuse to change the precise type used for an atomic load
562 /// or a volatile load. This is debatable, and might be reasonable to change
563 /// later. However, it is risky in case some backend or other part of LLVM is
564 /// relying on the exact type loaded to select appropriate atomic operations.
566  // FIXME: We could probably with some care handle both volatile and ordered
567  // atomic loads here but it isn't clear that this is important.
568  if (!LI.isUnordered())
569  return nullptr;
570 
571  if (LI.use_empty())
572  return nullptr;
573 
574  // swifterror values can't be bitcasted.
575  if (LI.getPointerOperand()->isSwiftError())
576  return nullptr;
577 
578  Type *Ty = LI.getType();
579  const DataLayout &DL = IC.getDataLayout();
580 
581  // Try to canonicalize loads which are only ever stored to operate over
582  // integers instead of any other type. We only do this when the loaded type
583  // is sized and has a size exactly the same as its store size and the store
584  // size is a legal integer type.
585  // Do not perform canonicalization if minmax pattern is found (to avoid
586  // infinite loop).
587  if (!Ty->isIntegerTy() && Ty->isSized() &&
589  DL.typeSizeEqualsStoreSize(Ty) &&
590  !DL.isNonIntegralPointerType(Ty) &&
592  peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
593  if (all_of(LI.users(), [&LI](User *U) {
594  auto *SI = dyn_cast<StoreInst>(U);
595  return SI && SI->getPointerOperand() != &LI &&
596  !SI->getPointerOperand()->isSwiftError();
597  })) {
598  LoadInst *NewLoad = combineLoadToNewType(
599  IC, LI,
601  // Replace all the stores with stores of the newly loaded value.
602  for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
603  auto *SI = cast<StoreInst>(*UI++);
605  combineStoreToNewValue(IC, *SI, NewLoad);
607  }
608  assert(LI.use_empty() && "Failed to remove all users of the load!");
609  // Return the old load so the combiner can delete it safely.
610  return &LI;
611  }
612  }
613 
614  // Fold away bit casts of the loaded value by loading the desired type.
615  // We can do this for BitCastInsts as well as casts from and to pointer types,
616  // as long as those are noops (i.e., the source or dest type have the same
617  // bitwidth as the target's pointers).
618  if (LI.hasOneUse())
619  if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
620  if (CI->isNoopCast(DL))
621  if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
622  LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
623  CI->replaceAllUsesWith(NewLoad);
624  IC.eraseInstFromFunction(*CI);
625  return &LI;
626  }
627 
628  // FIXME: We should also canonicalize loads of vectors when their elements are
629  // cast to other types.
630  return nullptr;
631 }
632 
634  // FIXME: We could probably with some care handle both volatile and atomic
635  // stores here but it isn't clear that this is important.
636  if (!LI.isSimple())
637  return nullptr;
638 
639  Type *T = LI.getType();
640  if (!T->isAggregateType())
641  return nullptr;
642 
643  StringRef Name = LI.getName();
644  assert(LI.getAlignment() && "Alignment must be set at this point");
645 
646  if (auto *ST = dyn_cast<StructType>(T)) {
647  // If the struct only have one element, we unpack.
648  auto NumElements = ST->getNumElements();
649  if (NumElements == 1) {
650  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
651  ".unpack");
652  AAMDNodes AAMD;
653  LI.getAAMetadata(AAMD);
654  NewLoad->setAAMetadata(AAMD);
656  UndefValue::get(T), NewLoad, 0, Name));
657  }
658 
659  // We don't want to break loads with padding here as we'd loose
660  // the knowledge that padding exists for the rest of the pipeline.
661  const DataLayout &DL = IC.getDataLayout();
662  auto *SL = DL.getStructLayout(ST);
663  if (SL->hasPadding())
664  return nullptr;
665 
666  auto Align = LI.getAlignment();
667  if (!Align)
669 
670  auto *Addr = LI.getPointerOperand();
671  auto *IdxType = Type::getInt32Ty(T->getContext());
672  auto *Zero = ConstantInt::get(IdxType, 0);
673 
674  Value *V = UndefValue::get(T);
675  for (unsigned i = 0; i < NumElements; i++) {
676  Value *Indices[2] = {
677  Zero,
678  ConstantInt::get(IdxType, i),
679  };
680  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
681  Name + ".elt");
682  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
683  auto *L = IC.Builder.CreateAlignedLoad(ST->getElementType(i), Ptr,
684  EltAlign, Name + ".unpack");
685  // Propagate AA metadata. It'll still be valid on the narrowed load.
686  AAMDNodes AAMD;
687  LI.getAAMetadata(AAMD);
688  L->setAAMetadata(AAMD);
689  V = IC.Builder.CreateInsertValue(V, L, i);
690  }
691 
692  V->setName(Name);
693  return IC.replaceInstUsesWith(LI, V);
694  }
695 
696  if (auto *AT = dyn_cast<ArrayType>(T)) {
697  auto *ET = AT->getElementType();
698  auto NumElements = AT->getNumElements();
699  if (NumElements == 1) {
700  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
701  AAMDNodes AAMD;
702  LI.getAAMetadata(AAMD);
703  NewLoad->setAAMetadata(AAMD);
705  UndefValue::get(T), NewLoad, 0, Name));
706  }
707 
708  // Bail out if the array is too large. Ideally we would like to optimize
709  // arrays of arbitrary size but this has a terrible impact on compile time.
710  // The threshold here is chosen arbitrarily, maybe needs a little bit of
711  // tuning.
712  if (NumElements > IC.MaxArraySizeForCombine)
713  return nullptr;
714 
715  const DataLayout &DL = IC.getDataLayout();
716  auto EltSize = DL.getTypeAllocSize(ET);
717  auto Align = LI.getAlignment();
718  if (!Align)
719  Align = DL.getABITypeAlignment(T);
720 
721  auto *Addr = LI.getPointerOperand();
722  auto *IdxType = Type::getInt64Ty(T->getContext());
723  auto *Zero = ConstantInt::get(IdxType, 0);
724 
725  Value *V = UndefValue::get(T);
726  uint64_t Offset = 0;
727  for (uint64_t i = 0; i < NumElements; i++) {
728  Value *Indices[2] = {
729  Zero,
730  ConstantInt::get(IdxType, i),
731  };
732  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
733  Name + ".elt");
734  auto *L = IC.Builder.CreateAlignedLoad(
735  AT->getElementType(), Ptr, MinAlign(Align, Offset), Name + ".unpack");
736  AAMDNodes AAMD;
737  LI.getAAMetadata(AAMD);
738  L->setAAMetadata(AAMD);
739  V = IC.Builder.CreateInsertValue(V, L, i);
740  Offset += EltSize;
741  }
742 
743  V->setName(Name);
744  return IC.replaceInstUsesWith(LI, V);
745  }
746 
747  return nullptr;
748 }
749 
750 // If we can determine that all possible objects pointed to by the provided
751 // pointer value are, not only dereferenceable, but also definitively less than
752 // or equal to the provided maximum size, then return true. Otherwise, return
753 // false (constant global values and allocas fall into this category).
754 //
755 // FIXME: This should probably live in ValueTracking (or similar).
756 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
757  const DataLayout &DL) {
758  SmallPtrSet<Value *, 4> Visited;
759  SmallVector<Value *, 4> Worklist(1, V);
760 
761  do {
762  Value *P = Worklist.pop_back_val();
763  P = P->stripPointerCasts();
764 
765  if (!Visited.insert(P).second)
766  continue;
767 
768  if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
769  Worklist.push_back(SI->getTrueValue());
770  Worklist.push_back(SI->getFalseValue());
771  continue;
772  }
773 
774  if (PHINode *PN = dyn_cast<PHINode>(P)) {
775  for (Value *IncValue : PN->incoming_values())
776  Worklist.push_back(IncValue);
777  continue;
778  }
779 
780  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
781  if (GA->isInterposable())
782  return false;
783  Worklist.push_back(GA->getAliasee());
784  continue;
785  }
786 
787  // If we know how big this object is, and it is less than MaxSize, continue
788  // searching. Otherwise, return false.
789  if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
790  if (!AI->getAllocatedType()->isSized())
791  return false;
792 
793  ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
794  if (!CS)
795  return false;
796 
797  uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
798  // Make sure that, even if the multiplication below would wrap as an
799  // uint64_t, we still do the right thing.
800  if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
801  return false;
802  continue;
803  }
804 
805  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
806  if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
807  return false;
808 
809  uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
810  if (InitSize > MaxSize)
811  return false;
812  continue;
813  }
814 
815  return false;
816  } while (!Worklist.empty());
817 
818  return true;
819 }
820 
821 // If we're indexing into an object of a known size, and the outer index is
822 // not a constant, but having any value but zero would lead to undefined
823 // behavior, replace it with zero.
824 //
825 // For example, if we have:
826 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
827 // ...
828 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
829 // ... = load i32* %arrayidx, align 4
830 // Then we know that we can replace %x in the GEP with i64 0.
831 //
832 // FIXME: We could fold any GEP index to zero that would cause UB if it were
833 // not zero. Currently, we only handle the first such index. Also, we could
834 // also search through non-zero constant indices if we kept track of the
835 // offsets those indices implied.
837  Instruction *MemI, unsigned &Idx) {
838  if (GEPI->getNumOperands() < 2)
839  return false;
840 
841  // Find the first non-zero index of a GEP. If all indices are zero, return
842  // one past the last index.
843  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
844  unsigned I = 1;
845  for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
846  Value *V = GEPI->getOperand(I);
847  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
848  if (CI->isZero())
849  continue;
850 
851  break;
852  }
853 
854  return I;
855  };
856 
857  // Skip through initial 'zero' indices, and find the corresponding pointer
858  // type. See if the next index is not a constant.
859  Idx = FirstNZIdx(GEPI);
860  if (Idx == GEPI->getNumOperands())
861  return false;
862  if (isa<Constant>(GEPI->getOperand(Idx)))
863  return false;
864 
865  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
866  Type *AllocTy =
868  if (!AllocTy || !AllocTy->isSized())
869  return false;
870  const DataLayout &DL = IC.getDataLayout();
871  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
872 
873  // If there are more indices after the one we might replace with a zero, make
874  // sure they're all non-negative. If any of them are negative, the overall
875  // address being computed might be before the base address determined by the
876  // first non-zero index.
877  auto IsAllNonNegative = [&]() {
878  for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
879  KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
880  if (Known.isNonNegative())
881  continue;
882  return false;
883  }
884 
885  return true;
886  };
887 
888  // FIXME: If the GEP is not inbounds, and there are extra indices after the
889  // one we'll replace, those could cause the address computation to wrap
890  // (rendering the IsAllNonNegative() check below insufficient). We can do
891  // better, ignoring zero indices (and other indices we can prove small
892  // enough not to wrap).
893  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
894  return false;
895 
896  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
897  // also known to be dereferenceable.
898  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
899  IsAllNonNegative();
900 }
901 
902 // If we're indexing into an object with a variable index for the memory
903 // access, but the object has only one element, we can assume that the index
904 // will always be zero. If we replace the GEP, return it.
905 template <typename T>
907  T &MemI) {
908  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
909  unsigned Idx;
910  if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
911  Instruction *NewGEPI = GEPI->clone();
912  NewGEPI->setOperand(Idx,
913  ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
914  NewGEPI->insertBefore(GEPI);
915  MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
916  return NewGEPI;
917  }
918  }
919 
920  return nullptr;
921 }
922 
925  return false;
926 
927  auto *Ptr = SI.getPointerOperand();
928  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
929  Ptr = GEPI->getOperand(0);
930  return (isa<ConstantPointerNull>(Ptr) &&
932 }
933 
935  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
936  const Value *GEPI0 = GEPI->getOperand(0);
937  if (isa<ConstantPointerNull>(GEPI0) &&
938  !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
939  return true;
940  }
941  if (isa<UndefValue>(Op) ||
942  (isa<ConstantPointerNull>(Op) &&
944  return true;
945  return false;
946 }
947 
949  Value *Op = LI.getOperand(0);
950 
951  // Try to canonicalize the loaded type.
952  if (Instruction *Res = combineLoadToOperationType(*this, LI))
953  return Res;
954 
955  // Attempt to improve the alignment.
956  unsigned KnownAlign = getOrEnforceKnownAlignment(
957  Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
958  unsigned LoadAlign = LI.getAlignment();
959  unsigned EffectiveLoadAlign =
960  LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
961 
962  if (KnownAlign > EffectiveLoadAlign)
963  LI.setAlignment(KnownAlign);
964  else if (LoadAlign == 0)
965  LI.setAlignment(EffectiveLoadAlign);
966 
967  // Replace GEP indices if possible.
968  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
969  Worklist.Add(NewGEPI);
970  return &LI;
971  }
972 
973  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
974  return Res;
975 
976  // Do really simple store-to-load forwarding and load CSE, to catch cases
977  // where there are several consecutive memory accesses to the same location,
978  // separated by a few arithmetic operations.
979  BasicBlock::iterator BBI(LI);
980  bool IsLoadCSE = false;
981  if (Value *AvailableVal = FindAvailableLoadedValue(
982  &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
983  if (IsLoadCSE)
984  combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
985 
986  return replaceInstUsesWith(
987  LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
988  LI.getName() + ".cast"));
989  }
990 
991  // None of the following transforms are legal for volatile/ordered atomic
992  // loads. Most of them do apply for unordered atomics.
993  if (!LI.isUnordered()) return nullptr;
994 
995  // load(gep null, ...) -> unreachable
996  // load null/undef -> unreachable
997  // TODO: Consider a target hook for valid address spaces for this xforms.
998  if (canSimplifyNullLoadOrGEP(LI, Op)) {
999  // Insert a new store to null instruction before the load to indicate
1000  // that this code is not reachable. We do this instead of inserting
1001  // an unreachable instruction directly because we cannot modify the
1002  // CFG.
1004  Constant::getNullValue(Op->getType()), &LI);
1005  SI->setDebugLoc(LI.getDebugLoc());
1006  return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1007  }
1008 
1009  if (Op->hasOneUse()) {
1010  // Change select and PHI nodes to select values instead of addresses: this
1011  // helps alias analysis out a lot, allows many others simplifications, and
1012  // exposes redundancy in the code.
1013  //
1014  // Note that we cannot do the transformation unless we know that the
1015  // introduced loads cannot trap! Something like this is valid as long as
1016  // the condition is always false: load (select bool %C, int* null, int* %G),
1017  // but it would not be valid if we transformed it to load from null
1018  // unconditionally.
1019  //
1020  if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1021  // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1022  unsigned Align = LI.getAlignment();
1023  if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align,
1024  DL, SI) &&
1025  isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align,
1026  DL, SI)) {
1027  LoadInst *V1 =
1028  Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1029  SI->getOperand(1)->getName() + ".val");
1030  LoadInst *V2 =
1031  Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1032  SI->getOperand(2)->getName() + ".val");
1033  assert(LI.isUnordered() && "implied by above");
1034  V1->setAlignment(Align);
1035  V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1036  V2->setAlignment(Align);
1037  V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1038  return SelectInst::Create(SI->getCondition(), V1, V2);
1039  }
1040 
1041  // load (select (cond, null, P)) -> load P
1042  if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1043  !NullPointerIsDefined(SI->getFunction(),
1044  LI.getPointerAddressSpace())) {
1045  LI.setOperand(0, SI->getOperand(2));
1046  return &LI;
1047  }
1048 
1049  // load (select (cond, P, null)) -> load P
1050  if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1051  !NullPointerIsDefined(SI->getFunction(),
1052  LI.getPointerAddressSpace())) {
1053  LI.setOperand(0, SI->getOperand(1));
1054  return &LI;
1055  }
1056  }
1057  }
1058  return nullptr;
1059 }
1060 
1061 /// Look for extractelement/insertvalue sequence that acts like a bitcast.
1062 ///
1063 /// \returns underlying value that was "cast", or nullptr otherwise.
1064 ///
1065 /// For example, if we have:
1066 ///
1067 /// %E0 = extractelement <2 x double> %U, i32 0
1068 /// %V0 = insertvalue [2 x double] undef, double %E0, 0
1069 /// %E1 = extractelement <2 x double> %U, i32 1
1070 /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1071 ///
1072 /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1073 /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1074 /// Note that %U may contain non-undef values where %V1 has undef.
1076  Value *U = nullptr;
1077  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1078  auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1079  if (!E)
1080  return nullptr;
1081  auto *W = E->getVectorOperand();
1082  if (!U)
1083  U = W;
1084  else if (U != W)
1085  return nullptr;
1086  auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1087  if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1088  return nullptr;
1089  V = IV->getAggregateOperand();
1090  }
1091  if (!isa<UndefValue>(V) ||!U)
1092  return nullptr;
1093 
1094  auto *UT = cast<VectorType>(U->getType());
1095  auto *VT = V->getType();
1096  // Check that types UT and VT are bitwise isomorphic.
1097  const auto &DL = IC.getDataLayout();
1098  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1099  return nullptr;
1100  }
1101  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1102  if (AT->getNumElements() != UT->getNumElements())
1103  return nullptr;
1104  } else {
1105  auto *ST = cast<StructType>(VT);
1106  if (ST->getNumElements() != UT->getNumElements())
1107  return nullptr;
1108  for (const auto *EltT : ST->elements()) {
1109  if (EltT != UT->getElementType())
1110  return nullptr;
1111  }
1112  }
1113  return U;
1114 }
1115 
1116 /// Combine stores to match the type of value being stored.
1117 ///
1118 /// The core idea here is that the memory does not have any intrinsic type and
1119 /// where we can we should match the type of a store to the type of value being
1120 /// stored.
1121 ///
1122 /// However, this routine must never change the width of a store or the number of
1123 /// stores as that would introduce a semantic change. This combine is expected to
1124 /// be a semantic no-op which just allows stores to more closely model the types
1125 /// of their incoming values.
1126 ///
1127 /// Currently, we also refuse to change the precise type used for an atomic or
1128 /// volatile store. This is debatable, and might be reasonable to change later.
1129 /// However, it is risky in case some backend or other part of LLVM is relying
1130 /// on the exact type stored to select appropriate atomic operations.
1131 ///
1132 /// \returns true if the store was successfully combined away. This indicates
1133 /// the caller must erase the store instruction. We have to let the caller erase
1134 /// the store instruction as otherwise there is no way to signal whether it was
1135 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
1137  // FIXME: We could probably with some care handle both volatile and ordered
1138  // atomic stores here but it isn't clear that this is important.
1139  if (!SI.isUnordered())
1140  return false;
1141 
1142  // swifterror values can't be bitcasted.
1143  if (SI.getPointerOperand()->isSwiftError())
1144  return false;
1145 
1146  Value *V = SI.getValueOperand();
1147 
1148  // Fold away bit casts of the stored value by storing the original type.
1149  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1150  V = BC->getOperand(0);
1151  if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1152  combineStoreToNewValue(IC, SI, V);
1153  return true;
1154  }
1155  }
1156 
1157  if (Value *U = likeBitCastFromVector(IC, V))
1158  if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1159  combineStoreToNewValue(IC, SI, U);
1160  return true;
1161  }
1162 
1163  // FIXME: We should also canonicalize stores of vectors when their elements
1164  // are cast to other types.
1165  return false;
1166 }
1167 
1169  // FIXME: We could probably with some care handle both volatile and atomic
1170  // stores here but it isn't clear that this is important.
1171  if (!SI.isSimple())
1172  return false;
1173 
1174  Value *V = SI.getValueOperand();
1175  Type *T = V->getType();
1176 
1177  if (!T->isAggregateType())
1178  return false;
1179 
1180  if (auto *ST = dyn_cast<StructType>(T)) {
1181  // If the struct only have one element, we unpack.
1182  unsigned Count = ST->getNumElements();
1183  if (Count == 1) {
1184  V = IC.Builder.CreateExtractValue(V, 0);
1185  combineStoreToNewValue(IC, SI, V);
1186  return true;
1187  }
1188 
1189  // We don't want to break loads with padding here as we'd loose
1190  // the knowledge that padding exists for the rest of the pipeline.
1191  const DataLayout &DL = IC.getDataLayout();
1192  auto *SL = DL.getStructLayout(ST);
1193  if (SL->hasPadding())
1194  return false;
1195 
1196  auto Align = SI.getAlignment();
1197  if (!Align)
1199 
1200  SmallString<16> EltName = V->getName();
1201  EltName += ".elt";
1202  auto *Addr = SI.getPointerOperand();
1203  SmallString<16> AddrName = Addr->getName();
1204  AddrName += ".repack";
1205 
1206  auto *IdxType = Type::getInt32Ty(ST->getContext());
1207  auto *Zero = ConstantInt::get(IdxType, 0);
1208  for (unsigned i = 0; i < Count; i++) {
1209  Value *Indices[2] = {
1210  Zero,
1211  ConstantInt::get(IdxType, i),
1212  };
1213  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1214  AddrName);
1215  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1216  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1217  llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1218  AAMDNodes AAMD;
1219  SI.getAAMetadata(AAMD);
1220  NS->setAAMetadata(AAMD);
1221  }
1222 
1223  return true;
1224  }
1225 
1226  if (auto *AT = dyn_cast<ArrayType>(T)) {
1227  // If the array only have one element, we unpack.
1228  auto NumElements = AT->getNumElements();
1229  if (NumElements == 1) {
1230  V = IC.Builder.CreateExtractValue(V, 0);
1231  combineStoreToNewValue(IC, SI, V);
1232  return true;
1233  }
1234 
1235  // Bail out if the array is too large. Ideally we would like to optimize
1236  // arrays of arbitrary size but this has a terrible impact on compile time.
1237  // The threshold here is chosen arbitrarily, maybe needs a little bit of
1238  // tuning.
1239  if (NumElements > IC.MaxArraySizeForCombine)
1240  return false;
1241 
1242  const DataLayout &DL = IC.getDataLayout();
1243  auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1244  auto Align = SI.getAlignment();
1245  if (!Align)
1246  Align = DL.getABITypeAlignment(T);
1247 
1248  SmallString<16> EltName = V->getName();
1249  EltName += ".elt";
1250  auto *Addr = SI.getPointerOperand();
1251  SmallString<16> AddrName = Addr->getName();
1252  AddrName += ".repack";
1253 
1254  auto *IdxType = Type::getInt64Ty(T->getContext());
1255  auto *Zero = ConstantInt::get(IdxType, 0);
1256 
1257  uint64_t Offset = 0;
1258  for (uint64_t i = 0; i < NumElements; i++) {
1259  Value *Indices[2] = {
1260  Zero,
1261  ConstantInt::get(IdxType, i),
1262  };
1263  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1264  AddrName);
1265  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1266  auto EltAlign = MinAlign(Align, Offset);
1267  Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1268  AAMDNodes AAMD;
1269  SI.getAAMetadata(AAMD);
1270  NS->setAAMetadata(AAMD);
1271  Offset += EltSize;
1272  }
1273 
1274  return true;
1275  }
1276 
1277  return false;
1278 }
1279 
1280 /// equivalentAddressValues - Test if A and B will obviously have the same
1281 /// value. This includes recognizing that %t0 and %t1 will have the same
1282 /// value in code like this:
1283 /// %t0 = getelementptr \@a, 0, 3
1284 /// store i32 0, i32* %t0
1285 /// %t1 = getelementptr \@a, 0, 3
1286 /// %t2 = load i32* %t1
1287 ///
1289  // Test if the values are trivially equivalent.
1290  if (A == B) return true;
1291 
1292  // Test if the values come form identical arithmetic instructions.
1293  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1294  // its only used to compare two uses within the same basic block, which
1295  // means that they'll always either have the same value or one of them
1296  // will have an undefined value.
1297  if (isa<BinaryOperator>(A) ||
1298  isa<CastInst>(A) ||
1299  isa<PHINode>(A) ||
1300  isa<GetElementPtrInst>(A))
1301  if (Instruction *BI = dyn_cast<Instruction>(B))
1302  if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1303  return true;
1304 
1305  // Otherwise they may not be equivalent.
1306  return false;
1307 }
1308 
1309 /// Converts store (bitcast (load (bitcast (select ...)))) to
1310 /// store (load (select ...)), where select is minmax:
1311 /// select ((cmp load V1, load V2), V1, V2).
1313  StoreInst &SI) {
1314  // bitcast?
1315  if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1316  return false;
1317  // load? integer?
1318  Value *LoadAddr;
1319  if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1320  return false;
1321  auto *LI = cast<LoadInst>(SI.getValueOperand());
1322  if (!LI->getType()->isIntegerTy())
1323  return false;
1324  if (!isMinMaxWithLoads(LoadAddr))
1325  return false;
1326 
1327  if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1328  auto *SI = dyn_cast<StoreInst>(U);
1329  return SI && SI->getPointerOperand() != LI &&
1330  peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1331  !SI->getPointerOperand()->isSwiftError();
1332  }))
1333  return false;
1334 
1335  IC.Builder.SetInsertPoint(LI);
1336  LoadInst *NewLI = combineLoadToNewType(
1337  IC, *LI, LoadAddr->getType()->getPointerElementType());
1338  // Replace all the stores with stores of the newly loaded value.
1339  for (auto *UI : LI->users()) {
1340  auto *USI = cast<StoreInst>(UI);
1341  IC.Builder.SetInsertPoint(USI);
1342  combineStoreToNewValue(IC, *USI, NewLI);
1343  }
1344  IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1345  IC.eraseInstFromFunction(*LI);
1346  return true;
1347 }
1348 
1350  Value *Val = SI.getOperand(0);
1351  Value *Ptr = SI.getOperand(1);
1352 
1353  // Try to canonicalize the stored type.
1354  if (combineStoreToValueType(*this, SI))
1355  return eraseInstFromFunction(SI);
1356 
1357  // Attempt to improve the alignment.
1358  unsigned KnownAlign = getOrEnforceKnownAlignment(
1359  Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
1360  unsigned StoreAlign = SI.getAlignment();
1361  unsigned EffectiveStoreAlign =
1362  StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
1363 
1364  if (KnownAlign > EffectiveStoreAlign)
1365  SI.setAlignment(KnownAlign);
1366  else if (StoreAlign == 0)
1367  SI.setAlignment(EffectiveStoreAlign);
1368 
1369  // Try to canonicalize the stored type.
1370  if (unpackStoreToAggregate(*this, SI))
1371  return eraseInstFromFunction(SI);
1372 
1373  if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1374  return eraseInstFromFunction(SI);
1375 
1376  // Replace GEP indices if possible.
1377  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1378  Worklist.Add(NewGEPI);
1379  return &SI;
1380  }
1381 
1382  // Don't hack volatile/ordered stores.
1383  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1384  if (!SI.isUnordered()) return nullptr;
1385 
1386  // If the RHS is an alloca with a single use, zapify the store, making the
1387  // alloca dead.
1388  if (Ptr->hasOneUse()) {
1389  if (isa<AllocaInst>(Ptr))
1390  return eraseInstFromFunction(SI);
1391  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1392  if (isa<AllocaInst>(GEP->getOperand(0))) {
1393  if (GEP->getOperand(0)->hasOneUse())
1394  return eraseInstFromFunction(SI);
1395  }
1396  }
1397  }
1398 
1399  // If we have a store to a location which is known constant, we can conclude
1400  // that the store must be storing the constant value (else the memory
1401  // wouldn't be constant), and this must be a noop.
1402  if (AA->pointsToConstantMemory(Ptr))
1403  return eraseInstFromFunction(SI);
1404 
1405  // Do really simple DSE, to catch cases where there are several consecutive
1406  // stores to the same location, separated by a few arithmetic operations. This
1407  // situation often occurs with bitfield accesses.
1408  BasicBlock::iterator BBI(SI);
1409  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1410  --ScanInsts) {
1411  --BBI;
1412  // Don't count debug info directives, lest they affect codegen,
1413  // and we skip pointer-to-pointer bitcasts, which are NOPs.
1414  if (isa<DbgInfoIntrinsic>(BBI) ||
1415  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1416  ScanInsts++;
1417  continue;
1418  }
1419 
1420  if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1421  // Prev store isn't volatile, and stores to the same location?
1422  if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1423  SI.getOperand(1))) {
1424  ++NumDeadStore;
1425  ++BBI;
1426  eraseInstFromFunction(*PrevSI);
1427  continue;
1428  }
1429  break;
1430  }
1431 
1432  // If this is a load, we have to stop. However, if the loaded value is from
1433  // the pointer we're loading and is producing the pointer we're storing,
1434  // then *this* store is dead (X = load P; store X -> P).
1435  if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1436  if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1437  assert(SI.isUnordered() && "can't eliminate ordering operation");
1438  return eraseInstFromFunction(SI);
1439  }
1440 
1441  // Otherwise, this is a load from some other location. Stores before it
1442  // may not be dead.
1443  break;
1444  }
1445 
1446  // Don't skip over loads, throws or things that can modify memory.
1447  if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1448  break;
1449  }
1450 
1451  // store X, null -> turns into 'unreachable' in SimplifyCFG
1452  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1453  if (canSimplifyNullStoreOrGEP(SI)) {
1454  if (!isa<UndefValue>(Val)) {
1455  SI.setOperand(0, UndefValue::get(Val->getType()));
1456  if (Instruction *U = dyn_cast<Instruction>(Val))
1457  Worklist.Add(U); // Dropped a use.
1458  }
1459  return nullptr; // Do not modify these!
1460  }
1461 
1462  // store undef, Ptr -> noop
1463  if (isa<UndefValue>(Val))
1464  return eraseInstFromFunction(SI);
1465 
1466  // If this store is the second-to-last instruction in the basic block
1467  // (excluding debug info and bitcasts of pointers) and if the block ends with
1468  // an unconditional branch, try to move the store to the successor block.
1469  BBI = SI.getIterator();
1470  do {
1471  ++BBI;
1472  } while (isa<DbgInfoIntrinsic>(BBI) ||
1473  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
1474 
1475  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1476  if (BI->isUnconditional())
1477  mergeStoreIntoSuccessor(SI);
1478 
1479  return nullptr;
1480 }
1481 
1482 /// Try to transform:
1483 /// if () { *P = v1; } else { *P = v2 }
1484 /// or:
1485 /// *P = v1; if () { *P = v2; }
1486 /// into a phi node with a store in the successor.
1487 bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
1488  assert(SI.isUnordered() &&
1489  "This code has not been audited for volatile or ordered store case.");
1490 
1491  // Check if the successor block has exactly 2 incoming edges.
1492  BasicBlock *StoreBB = SI.getParent();
1493  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1494  if (!DestBB->hasNPredecessors(2))
1495  return false;
1496 
1497  // Capture the other block (the block that doesn't contain our store).
1498  pred_iterator PredIter = pred_begin(DestBB);
1499  if (*PredIter == StoreBB)
1500  ++PredIter;
1501  BasicBlock *OtherBB = *PredIter;
1502 
1503  // Bail out if all of the relevant blocks aren't distinct. This can happen,
1504  // for example, if SI is in an infinite loop.
1505  if (StoreBB == DestBB || OtherBB == DestBB)
1506  return false;
1507 
1508  // Verify that the other block ends in a branch and is not otherwise empty.
1509  BasicBlock::iterator BBI(OtherBB->getTerminator());
1510  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1511  if (!OtherBr || BBI == OtherBB->begin())
1512  return false;
1513 
1514  // If the other block ends in an unconditional branch, check for the 'if then
1515  // else' case. There is an instruction before the branch.
1516  StoreInst *OtherStore = nullptr;
1517  if (OtherBr->isUnconditional()) {
1518  --BBI;
1519  // Skip over debugging info.
1520  while (isa<DbgInfoIntrinsic>(BBI) ||
1521  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1522  if (BBI==OtherBB->begin())
1523  return false;
1524  --BBI;
1525  }
1526  // If this isn't a store, isn't a store to the same location, or is not the
1527  // right kind of store, bail out.
1528  OtherStore = dyn_cast<StoreInst>(BBI);
1529  if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1530  !SI.isSameOperationAs(OtherStore))
1531  return false;
1532  } else {
1533  // Otherwise, the other block ended with a conditional branch. If one of the
1534  // destinations is StoreBB, then we have the if/then case.
1535  if (OtherBr->getSuccessor(0) != StoreBB &&
1536  OtherBr->getSuccessor(1) != StoreBB)
1537  return false;
1538 
1539  // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1540  // if/then triangle. See if there is a store to the same ptr as SI that
1541  // lives in OtherBB.
1542  for (;; --BBI) {
1543  // Check to see if we find the matching store.
1544  if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1545  if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1546  !SI.isSameOperationAs(OtherStore))
1547  return false;
1548  break;
1549  }
1550  // If we find something that may be using or overwriting the stored
1551  // value, or if we run out of instructions, we can't do the transform.
1552  if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1553  BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1554  return false;
1555  }
1556 
1557  // In order to eliminate the store in OtherBr, we have to make sure nothing
1558  // reads or overwrites the stored value in StoreBB.
1559  for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1560  // FIXME: This should really be AA driven.
1561  if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1562  return false;
1563  }
1564  }
1565 
1566  // Insert a PHI node now if we need it.
1567  Value *MergedVal = OtherStore->getOperand(0);
1568  // The debug locations of the original instructions might differ. Merge them.
1570  OtherStore->getDebugLoc());
1571  if (MergedVal != SI.getOperand(0)) {
1572  PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1573  PN->addIncoming(SI.getOperand(0), SI.getParent());
1574  PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1575  MergedVal = InsertNewInstBefore(PN, DestBB->front());
1576  PN->setDebugLoc(MergedLoc);
1577  }
1578 
1579  // Advance to a place where it is safe to insert the new store and insert it.
1580  BBI = DestBB->getFirstInsertionPt();
1581  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1582  SI.isVolatile(), SI.getAlignment(),
1583  SI.getOrdering(), SI.getSyncScopeID());
1584  InsertNewInstBefore(NewSI, *BBI);
1585  NewSI->setDebugLoc(MergedLoc);
1586 
1587  // If the two stores had AA tags, merge them.
1588  AAMDNodes AATags;
1589  SI.getAAMetadata(AATags);
1590  if (AATags) {
1591  OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1592  NewSI->setAAMetadata(AATags);
1593  }
1594 
1595  // Nuke the old stores.
1596  eraseInstFromFunction(SI);
1597  eraseInstFromFunction(*OtherStore);
1598  return true;
1599 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1696
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:453
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
bool isSimple() const
Definition: Instructions.h:276
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:78
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:267
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1181
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1563
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:1633
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1612
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
void setAlignment(unsigned Align)
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
bool isUnordered() const
Definition: Instructions.h:403
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:618
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:901
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:262
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt...
Definition: STLExtras.h:1339
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:247
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:733
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:606
static LoadInst * combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
An instruction for reading from memory.
Definition: Instructions.h:167
static bool pointsToConstantGlobal(Value *V)
pointsToConstantGlobal - Return true if V (possibly indirectly) points to some part of a constant glo...
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type. ...
Definition: DataLayout.h:461
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:395
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:135
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:289
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1649
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:270
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:376
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:361
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:946
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:654
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
static Instruction * replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, T &MemI)
The core instruction combiner logic.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Type * getSourceElementType() const
Definition: Instructions.h:972
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1964
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static Instruction * simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI)
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:754
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:211
This class represents a no-op cast from one type to another.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:25
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
const DataLayout & getDataLayout() const
const BasicBlock & getEntryBlock() const
Definition: Function.h:664
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:774
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
#define P(N)
static Instruction * combineLoadToOperationType(InstCombiner &IC, LoadInst &LI)
Combine loads to match the type of their uses&#39; value after looking through intervening bitcasts...
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:223
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
const Instruction & front() const
Definition: BasicBlock.h:285
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static StoreInst * combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
bool mayThrow() const
Return true if this instruction may throw an exception.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:576
bool isUnordered() const
Definition: Instructions.h:278
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
Instruction * visitAllocaInst(AllocaInst &AI)
Value * getPointerOperand()
Definition: Instructions.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setAlignment(unsigned Align)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:525
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2349
size_t size() const
Definition: SmallVector.h:52
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction *> &ToDelete)
isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) pointer to an alloca...
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2033
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
static bool isSupportedAtomicType(Type *Ty)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:255
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:760
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:257
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1580
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
Get all metadata attached to this Instruction.
Definition: Instruction.h:260
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:419
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:377
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:470
This class wraps the llvm.memcpy/memmove intrinsics.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static Type * getIndexedType(Type *Ty, ArrayRef< Value *> IdxList)
Returns the type of the element that would be loaded with a load instruction with the specified param...
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:372
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, StoreInst &SI)
Converts store (bitcast (load (bitcast (select ...)))) to store (load (select ...)), where select is minmax: select ((cmp load V1, load V2), V1, V2).
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:259
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static Value * likeBitCastFromVector(InstCombiner &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:2382
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:582
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
Instruction * InsertNewInstBefore(Instruction *New, Instruction &Old)
Inserts an instruction New before instruction Old.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:935
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:395
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * visitLoadInst(LoadInst &LI)
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:445
Instruction * visitStoreInst(StoreInst &SI)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2369
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:432
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:98
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:203
bool isSimple() const
Definition: Instructions.h:401
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2357
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:412
static Instruction * unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI)
bool use_empty() const
Definition: Value.h:342
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:553
static bool isMinMaxWithLoads(Value *V)
Returns true if instruction represent minmax pattern like: select ((cmp load V1, load V2)...
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
user_iterator user_end()
Definition: Value.h:403