LLVM  10.0.0svn
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for load, store and alloca.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/MapVector.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/ConstantRange.h"
20 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/MDBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
27 using namespace llvm;
28 using namespace PatternMatch;
29 
30 #define DEBUG_TYPE "instcombine"
31 
32 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34 
35 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
36 /// some part of a constant global variable. This intentionally only accepts
37 /// constant expressions because we can't rewrite arbitrary instructions.
38 static bool pointsToConstantGlobal(Value *V) {
39  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
40  return GV->isConstant();
41 
42  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
43  if (CE->getOpcode() == Instruction::BitCast ||
44  CE->getOpcode() == Instruction::AddrSpaceCast ||
45  CE->getOpcode() == Instruction::GetElementPtr)
46  return pointsToConstantGlobal(CE->getOperand(0));
47  }
48  return false;
49 }
50 
51 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
52 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
53 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
54 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
55 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
56 /// the alloca, and if the source pointer is a pointer to a constant global, we
57 /// can optimize this.
58 static bool
61  // We track lifetime intrinsics as we encounter them. If we decide to go
62  // ahead and replace the value with the global, this lets the caller quickly
63  // eliminate the markers.
64 
65  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
66  ValuesToInspect.emplace_back(V, false);
67  while (!ValuesToInspect.empty()) {
68  auto ValuePair = ValuesToInspect.pop_back_val();
69  const bool IsOffset = ValuePair.second;
70  for (auto &U : ValuePair.first->uses()) {
71  auto *I = cast<Instruction>(U.getUser());
72 
73  if (auto *LI = dyn_cast<LoadInst>(I)) {
74  // Ignore non-volatile loads, they are always ok.
75  if (!LI->isSimple()) return false;
76  continue;
77  }
78 
79  if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
80  // If uses of the bitcast are ok, we are ok.
81  ValuesToInspect.emplace_back(I, IsOffset);
82  continue;
83  }
84  if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
85  // If the GEP has all zero indices, it doesn't offset the pointer. If it
86  // doesn't, it does.
87  ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
88  continue;
89  }
90 
91  if (auto *Call = dyn_cast<CallBase>(I)) {
92  // If this is the function being called then we treat it like a load and
93  // ignore it.
94  if (Call->isCallee(&U))
95  continue;
96 
97  unsigned DataOpNo = Call->getDataOperandNo(&U);
98  bool IsArgOperand = Call->isArgOperand(&U);
99 
100  // Inalloca arguments are clobbered by the call.
101  if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
102  return false;
103 
104  // If this is a readonly/readnone call site, then we know it is just a
105  // load (but one that potentially returns the value itself), so we can
106  // ignore it if we know that the value isn't captured.
107  if (Call->onlyReadsMemory() &&
108  (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
109  continue;
110 
111  // If this is being passed as a byval argument, the caller is making a
112  // copy, so it is only a read of the alloca.
113  if (IsArgOperand && Call->isByValArgument(DataOpNo))
114  continue;
115  }
116 
117  // Lifetime intrinsics can be handled by the caller.
118  if (I->isLifetimeStartOrEnd()) {
119  assert(I->use_empty() && "Lifetime markers have no result to use!");
120  ToDelete.push_back(I);
121  continue;
122  }
123 
124  // If this is isn't our memcpy/memmove, reject it as something we can't
125  // handle.
127  if (!MI)
128  return false;
129 
130  // If the transfer is using the alloca as a source of the transfer, then
131  // ignore it since it is a load (unless the transfer is volatile).
132  if (U.getOperandNo() == 1) {
133  if (MI->isVolatile()) return false;
134  continue;
135  }
136 
137  // If we already have seen a copy, reject the second one.
138  if (TheCopy) return false;
139 
140  // If the pointer has been offset from the start of the alloca, we can't
141  // safely handle this.
142  if (IsOffset) return false;
143 
144  // If the memintrinsic isn't using the alloca as the dest, reject it.
145  if (U.getOperandNo() != 0) return false;
146 
147  // If the source of the memcpy/move is not a constant global, reject it.
148  if (!pointsToConstantGlobal(MI->getSource()))
149  return false;
150 
151  // Otherwise, the transform is safe. Remember the copy instruction.
152  TheCopy = MI;
153  }
154  }
155  return true;
156 }
157 
158 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
159 /// modified by a copy from a constant global. If we can prove this, we can
160 /// replace any uses of the alloca with uses of the global directly.
161 static MemTransferInst *
163  SmallVectorImpl<Instruction *> &ToDelete) {
164  MemTransferInst *TheCopy = nullptr;
165  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
166  return TheCopy;
167  return nullptr;
168 }
169 
170 /// Returns true if V is dereferenceable for size of alloca.
171 static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
172  const DataLayout &DL) {
173  if (AI->isArrayAllocation())
174  return false;
175  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
176  if (!AllocaSize)
177  return false;
179  APInt(64, AllocaSize), DL);
180 }
181 
183  // Check for array size of 1 (scalar allocation).
184  if (!AI.isArrayAllocation()) {
185  // i32 1 is the canonical array size for scalar allocations.
186  if (AI.getArraySize()->getType()->isIntegerTy(32))
187  return nullptr;
188 
189  // Canonicalize it.
190  Value *V = IC.Builder.getInt32(1);
191  AI.setOperand(0, V);
192  return &AI;
193  }
194 
195  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
196  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
197  if (C->getValue().getActiveBits() <= 64) {
198  Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
199  AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
200  New->setAlignment(AI.getAlignment());
201 
202  // Scan to the end of the allocation instructions, to skip over a block of
203  // allocas if possible...also skip interleaved debug info
204  //
205  BasicBlock::iterator It(New);
206  while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
207  ++It;
208 
209  // Now that I is pointing to the first non-allocation-inst in the block,
210  // insert our getelementptr instruction...
211  //
212  Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
213  Value *NullIdx = Constant::getNullValue(IdxTy);
214  Value *Idx[2] = {NullIdx, NullIdx};
216  NewTy, New, Idx, New->getName() + ".sub");
217  IC.InsertNewInstBefore(GEP, *It);
218 
219  // Now make everything use the getelementptr instead of the original
220  // allocation.
221  return IC.replaceInstUsesWith(AI, GEP);
222  }
223  }
224 
225  if (isa<UndefValue>(AI.getArraySize()))
227 
228  // Ensure that the alloca array size argument has type intptr_t, so that
229  // any casting is exposed early.
230  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
231  if (AI.getArraySize()->getType() != IntPtrTy) {
232  Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
233  AI.setOperand(0, V);
234  return &AI;
235  }
236 
237  return nullptr;
238 }
239 
240 namespace {
241 // If I and V are pointers in different address space, it is not allowed to
242 // use replaceAllUsesWith since I and V have different types. A
243 // non-target-specific transformation should not use addrspacecast on V since
244 // the two address space may be disjoint depending on target.
245 //
246 // This class chases down uses of the old pointer until reaching the load
247 // instructions, then replaces the old pointer in the load instructions with
248 // the new pointer. If during the chasing it sees bitcast or GEP, it will
249 // create new bitcast or GEP with the new pointer and use them in the load
250 // instruction.
251 class PointerReplacer {
252 public:
253  PointerReplacer(InstCombiner &IC) : IC(IC) {}
254  void replacePointer(Instruction &I, Value *V);
255 
256 private:
257  void findLoadAndReplace(Instruction &I);
258  void replace(Instruction *I);
259  Value *getReplacement(Value *I);
260 
263  InstCombiner &IC;
264 };
265 } // end anonymous namespace
266 
267 void PointerReplacer::findLoadAndReplace(Instruction &I) {
268  for (auto U : I.users()) {
269  auto *Inst = dyn_cast<Instruction>(&*U);
270  if (!Inst)
271  return;
272  LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
273  if (isa<LoadInst>(Inst)) {
274  for (auto P : Path)
275  replace(P);
276  replace(Inst);
277  } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
278  Path.push_back(Inst);
279  findLoadAndReplace(*Inst);
280  Path.pop_back();
281  } else {
282  return;
283  }
284  }
285 }
286 
287 Value *PointerReplacer::getReplacement(Value *V) {
288  auto Loc = WorkMap.find(V);
289  if (Loc != WorkMap.end())
290  return Loc->second;
291  return nullptr;
292 }
293 
295  if (getReplacement(I))
296  return;
297 
298  if (auto *LT = dyn_cast<LoadInst>(I)) {
299  auto *V = getReplacement(LT->getPointerOperand());
300  assert(V && "Operand not replaced");
301  auto *NewI = new LoadInst(I->getType(), V);
302  NewI->takeName(LT);
303  IC.InsertNewInstWith(NewI, *LT);
304  IC.replaceInstUsesWith(*LT, NewI);
305  WorkMap[LT] = NewI;
306  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
307  auto *V = getReplacement(GEP->getPointerOperand());
308  assert(V && "Operand not replaced");
309  SmallVector<Value *, 8> Indices;
310  Indices.append(GEP->idx_begin(), GEP->idx_end());
311  auto *NewI = GetElementPtrInst::Create(
312  V->getType()->getPointerElementType(), V, Indices);
313  IC.InsertNewInstWith(NewI, *GEP);
314  NewI->takeName(GEP);
315  WorkMap[GEP] = NewI;
316  } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
317  auto *V = getReplacement(BC->getOperand(0));
318  assert(V && "Operand not replaced");
319  auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
321  auto *NewI = new BitCastInst(V, NewT);
322  IC.InsertNewInstWith(NewI, *BC);
323  NewI->takeName(BC);
324  WorkMap[BC] = NewI;
325  } else {
326  llvm_unreachable("should never reach here");
327  }
328 }
329 
330 void PointerReplacer::replacePointer(Instruction &I, Value *V) {
331 #ifndef NDEBUG
332  auto *PT = cast<PointerType>(I.getType());
333  auto *NT = cast<PointerType>(V->getType());
334  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
335  "Invalid usage");
336 #endif
337  WorkMap[&I] = V;
338  findLoadAndReplace(I);
339 }
340 
342  if (auto *I = simplifyAllocaArraySize(*this, AI))
343  return I;
344 
345  if (AI.getAllocatedType()->isSized()) {
346  // If the alignment is 0 (unspecified), assign it the preferred alignment.
347  if (AI.getAlignment() == 0)
348  AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
349 
350  // Move all alloca's of zero byte objects to the entry block and merge them
351  // together. Note that we only do this for alloca's, because malloc should
352  // allocate and return a unique pointer, even for a zero byte allocation.
353  if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
354  // For a zero sized alloca there is no point in doing an array allocation.
355  // This is helpful if the array size is a complicated expression not used
356  // elsewhere.
357  if (AI.isArrayAllocation()) {
358  AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
359  return &AI;
360  }
361 
362  // Get the first instruction in the entry block.
363  BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
364  Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
365  if (FirstInst != &AI) {
366  // If the entry block doesn't start with a zero-size alloca then move
367  // this one to the start of the entry block. There is no problem with
368  // dominance as the array size was forced to a constant earlier already.
369  AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
370  if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
371  DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
372  AI.moveBefore(FirstInst);
373  return &AI;
374  }
375 
376  // If the alignment of the entry block alloca is 0 (unspecified),
377  // assign it the preferred alignment.
378  if (EntryAI->getAlignment() == 0)
379  EntryAI->setAlignment(
380  DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
381  // Replace this zero-sized alloca with the one at the start of the entry
382  // block after ensuring that the address will be aligned enough for both
383  // types.
384  unsigned MaxAlign = std::max(EntryAI->getAlignment(),
385  AI.getAlignment());
386  EntryAI->setAlignment(MaxAlign);
387  if (AI.getType() != EntryAI->getType())
388  return new BitCastInst(EntryAI, AI.getType());
389  return replaceInstUsesWith(AI, EntryAI);
390  }
391  }
392  }
393 
394  if (AI.getAlignment()) {
395  // Check to see if this allocation is only modified by a memcpy/memmove from
396  // a constant global whose alignment is equal to or exceeds that of the
397  // allocation. If this is the case, we can change all users to use
398  // the constant global instead. This is commonly produced by the CFE by
399  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
400  // is only subsequently read.
402  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
403  unsigned SourceAlign = getOrEnforceKnownAlignment(
404  Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
405  if (AI.getAlignment() <= SourceAlign &&
406  isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
407  LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
408  LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
409  for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
410  eraseInstFromFunction(*ToDelete[i]);
411  Constant *TheSrc = cast<Constant>(Copy->getSource());
412  auto *SrcTy = TheSrc->getType();
413  auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
414  SrcTy->getPointerAddressSpace());
415  Constant *Cast =
417  if (AI.getType()->getPointerAddressSpace() ==
418  SrcTy->getPointerAddressSpace()) {
419  Instruction *NewI = replaceInstUsesWith(AI, Cast);
420  eraseInstFromFunction(*Copy);
421  ++NumGlobalCopies;
422  return NewI;
423  } else {
424  PointerReplacer PtrReplacer(*this);
425  PtrReplacer.replacePointer(AI, Cast);
426  ++NumGlobalCopies;
427  }
428  }
429  }
430  }
431 
432  // At last, use the generic allocation site handler to aggressively remove
433  // unused allocas.
434  return visitAllocSite(AI);
435 }
436 
437 // Are we allowed to form a atomic load or store of this type?
438 static bool isSupportedAtomicType(Type *Ty) {
439  return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
440 }
441 
442 /// Helper to combine a load to a new type.
443 ///
444 /// This just does the work of combining a load to a new type. It handles
445 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
446 /// loaded *value* type. This will convert it to a pointer, cast the operand to
447 /// that pointer type, load it, etc.
448 ///
449 /// Note that this will create all of the instructions with whatever insert
450 /// point the \c InstCombiner currently is using.
452  const Twine &Suffix = "") {
453  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
454  "can't fold an atomic load to requested type");
455 
456  Value *Ptr = LI.getPointerOperand();
457  unsigned AS = LI.getPointerAddressSpace();
459  LI.getAllMetadata(MD);
460 
461  Value *NewPtr = nullptr;
462  if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
463  NewPtr->getType()->getPointerElementType() == NewTy &&
464  NewPtr->getType()->getPointerAddressSpace() == AS))
465  NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
466 
467  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
468  NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
469  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
470  MDBuilder MDB(NewLoad->getContext());
471  for (const auto &MDPair : MD) {
472  unsigned ID = MDPair.first;
473  MDNode *N = MDPair.second;
474  // Note, essentially every kind of metadata should be preserved here! This
475  // routine is supposed to clone a load instruction changing *only its type*.
476  // The only metadata it makes sense to drop is metadata which is invalidated
477  // when the pointer type changes. This should essentially never be the case
478  // in LLVM, but we explicitly switch over only known metadata to be
479  // conservatively correct. If you are adding metadata to LLVM which pertains
480  // to loads, you almost certainly want to add it here.
481  switch (ID) {
482  case LLVMContext::MD_dbg:
493  // All of these directly apply.
494  NewLoad->setMetadata(ID, N);
495  break;
496 
498  copyNonnullMetadata(LI, N, *NewLoad);
499  break;
503  // These only directly apply if the new type is also a pointer.
504  if (NewTy->isPointerTy())
505  NewLoad->setMetadata(ID, N);
506  break;
508  copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
509  break;
510  }
511  }
512  return NewLoad;
513 }
514 
515 /// Combine a store to a new type.
516 ///
517 /// Returns the newly created store instruction.
519  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
520  "can't fold an atomic store of requested type");
521 
522  Value *Ptr = SI.getPointerOperand();
523  unsigned AS = SI.getPointerAddressSpace();
525  SI.getAllMetadata(MD);
526 
527  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
528  V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
529  SI.getAlignment(), SI.isVolatile());
530  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
531  for (const auto &MDPair : MD) {
532  unsigned ID = MDPair.first;
533  MDNode *N = MDPair.second;
534  // Note, essentially every kind of metadata should be preserved here! This
535  // routine is supposed to clone a store instruction changing *only its
536  // type*. The only metadata it makes sense to drop is metadata which is
537  // invalidated when the pointer type changes. This should essentially
538  // never be the case in LLVM, but we explicitly switch over only known
539  // metadata to be conservatively correct. If you are adding metadata to
540  // LLVM which pertains to stores, you almost certainly want to add it
541  // here.
542  switch (ID) {
543  case LLVMContext::MD_dbg:
553  // All of these directly apply.
554  NewStore->setMetadata(ID, N);
555  break;
562  // These don't apply for stores.
563  break;
564  }
565  }
566 
567  return NewStore;
568 }
569 
570 /// Returns true if instruction represent minmax pattern like:
571 /// select ((cmp load V1, load V2), V1, V2).
572 static bool isMinMaxWithLoads(Value *V) {
573  assert(V->getType()->isPointerTy() && "Expected pointer type.");
574  // Ignore possible ty* to ixx* bitcast.
575  V = peekThroughBitcast(V);
576  // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
577  // pattern.
578  CmpInst::Predicate Pred;
579  Instruction *L1;
580  Instruction *L2;
581  Value *LHS;
582  Value *RHS;
583  if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
584  m_Value(LHS), m_Value(RHS))))
585  return false;
586  return (match(L1, m_Load(m_Specific(LHS))) &&
587  match(L2, m_Load(m_Specific(RHS)))) ||
588  (match(L1, m_Load(m_Specific(RHS))) &&
589  match(L2, m_Load(m_Specific(LHS))));
590 }
591 
592 /// Combine loads to match the type of their uses' value after looking
593 /// through intervening bitcasts.
594 ///
595 /// The core idea here is that if the result of a load is used in an operation,
596 /// we should load the type most conducive to that operation. For example, when
597 /// loading an integer and converting that immediately to a pointer, we should
598 /// instead directly load a pointer.
599 ///
600 /// However, this routine must never change the width of a load or the number of
601 /// loads as that would introduce a semantic change. This combine is expected to
602 /// be a semantic no-op which just allows loads to more closely model the types
603 /// of their consuming operations.
604 ///
605 /// Currently, we also refuse to change the precise type used for an atomic load
606 /// or a volatile load. This is debatable, and might be reasonable to change
607 /// later. However, it is risky in case some backend or other part of LLVM is
608 /// relying on the exact type loaded to select appropriate atomic operations.
610  // FIXME: We could probably with some care handle both volatile and ordered
611  // atomic loads here but it isn't clear that this is important.
612  if (!LI.isUnordered())
613  return nullptr;
614 
615  if (LI.use_empty())
616  return nullptr;
617 
618  // swifterror values can't be bitcasted.
619  if (LI.getPointerOperand()->isSwiftError())
620  return nullptr;
621 
622  Type *Ty = LI.getType();
623  const DataLayout &DL = IC.getDataLayout();
624 
625  // Try to canonicalize loads which are only ever stored to operate over
626  // integers instead of any other type. We only do this when the loaded type
627  // is sized and has a size exactly the same as its store size and the store
628  // size is a legal integer type.
629  // Do not perform canonicalization if minmax pattern is found (to avoid
630  // infinite loop).
631  if (!Ty->isIntegerTy() && Ty->isSized() &&
633  DL.typeSizeEqualsStoreSize(Ty) &&
634  !DL.isNonIntegralPointerType(Ty) &&
636  peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
637  if (all_of(LI.users(), [&LI](User *U) {
638  auto *SI = dyn_cast<StoreInst>(U);
639  return SI && SI->getPointerOperand() != &LI &&
640  !SI->getPointerOperand()->isSwiftError();
641  })) {
642  LoadInst *NewLoad = combineLoadToNewType(
643  IC, LI,
645  // Replace all the stores with stores of the newly loaded value.
646  for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
647  auto *SI = cast<StoreInst>(*UI++);
649  combineStoreToNewValue(IC, *SI, NewLoad);
651  }
652  assert(LI.use_empty() && "Failed to remove all users of the load!");
653  // Return the old load so the combiner can delete it safely.
654  return &LI;
655  }
656  }
657 
658  // Fold away bit casts of the loaded value by loading the desired type.
659  // We can do this for BitCastInsts as well as casts from and to pointer types,
660  // as long as those are noops (i.e., the source or dest type have the same
661  // bitwidth as the target's pointers).
662  if (LI.hasOneUse())
663  if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
664  if (CI->isNoopCast(DL))
665  if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
666  LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
667  CI->replaceAllUsesWith(NewLoad);
668  IC.eraseInstFromFunction(*CI);
669  return &LI;
670  }
671 
672  // FIXME: We should also canonicalize loads of vectors when their elements are
673  // cast to other types.
674  return nullptr;
675 }
676 
678  // FIXME: We could probably with some care handle both volatile and atomic
679  // stores here but it isn't clear that this is important.
680  if (!LI.isSimple())
681  return nullptr;
682 
683  Type *T = LI.getType();
684  if (!T->isAggregateType())
685  return nullptr;
686 
687  StringRef Name = LI.getName();
688  assert(LI.getAlignment() && "Alignment must be set at this point");
689 
690  if (auto *ST = dyn_cast<StructType>(T)) {
691  // If the struct only have one element, we unpack.
692  auto NumElements = ST->getNumElements();
693  if (NumElements == 1) {
694  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
695  ".unpack");
696  AAMDNodes AAMD;
697  LI.getAAMetadata(AAMD);
698  NewLoad->setAAMetadata(AAMD);
700  UndefValue::get(T), NewLoad, 0, Name));
701  }
702 
703  // We don't want to break loads with padding here as we'd loose
704  // the knowledge that padding exists for the rest of the pipeline.
705  const DataLayout &DL = IC.getDataLayout();
706  auto *SL = DL.getStructLayout(ST);
707  if (SL->hasPadding())
708  return nullptr;
709 
710  auto Align = LI.getAlignment();
711  if (!Align)
713 
714  auto *Addr = LI.getPointerOperand();
715  auto *IdxType = Type::getInt32Ty(T->getContext());
716  auto *Zero = ConstantInt::get(IdxType, 0);
717 
718  Value *V = UndefValue::get(T);
719  for (unsigned i = 0; i < NumElements; i++) {
720  Value *Indices[2] = {
721  Zero,
722  ConstantInt::get(IdxType, i),
723  };
724  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
725  Name + ".elt");
726  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
727  auto *L = IC.Builder.CreateAlignedLoad(ST->getElementType(i), Ptr,
728  EltAlign, Name + ".unpack");
729  // Propagate AA metadata. It'll still be valid on the narrowed load.
730  AAMDNodes AAMD;
731  LI.getAAMetadata(AAMD);
732  L->setAAMetadata(AAMD);
733  V = IC.Builder.CreateInsertValue(V, L, i);
734  }
735 
736  V->setName(Name);
737  return IC.replaceInstUsesWith(LI, V);
738  }
739 
740  if (auto *AT = dyn_cast<ArrayType>(T)) {
741  auto *ET = AT->getElementType();
742  auto NumElements = AT->getNumElements();
743  if (NumElements == 1) {
744  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
745  AAMDNodes AAMD;
746  LI.getAAMetadata(AAMD);
747  NewLoad->setAAMetadata(AAMD);
749  UndefValue::get(T), NewLoad, 0, Name));
750  }
751 
752  // Bail out if the array is too large. Ideally we would like to optimize
753  // arrays of arbitrary size but this has a terrible impact on compile time.
754  // The threshold here is chosen arbitrarily, maybe needs a little bit of
755  // tuning.
756  if (NumElements > IC.MaxArraySizeForCombine)
757  return nullptr;
758 
759  const DataLayout &DL = IC.getDataLayout();
760  auto EltSize = DL.getTypeAllocSize(ET);
761  auto Align = LI.getAlignment();
762  if (!Align)
763  Align = DL.getABITypeAlignment(T);
764 
765  auto *Addr = LI.getPointerOperand();
766  auto *IdxType = Type::getInt64Ty(T->getContext());
767  auto *Zero = ConstantInt::get(IdxType, 0);
768 
769  Value *V = UndefValue::get(T);
770  uint64_t Offset = 0;
771  for (uint64_t i = 0; i < NumElements; i++) {
772  Value *Indices[2] = {
773  Zero,
774  ConstantInt::get(IdxType, i),
775  };
776  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
777  Name + ".elt");
778  auto *L = IC.Builder.CreateAlignedLoad(
779  AT->getElementType(), Ptr, MinAlign(Align, Offset), Name + ".unpack");
780  AAMDNodes AAMD;
781  LI.getAAMetadata(AAMD);
782  L->setAAMetadata(AAMD);
783  V = IC.Builder.CreateInsertValue(V, L, i);
784  Offset += EltSize;
785  }
786 
787  V->setName(Name);
788  return IC.replaceInstUsesWith(LI, V);
789  }
790 
791  return nullptr;
792 }
793 
794 // If we can determine that all possible objects pointed to by the provided
795 // pointer value are, not only dereferenceable, but also definitively less than
796 // or equal to the provided maximum size, then return true. Otherwise, return
797 // false (constant global values and allocas fall into this category).
798 //
799 // FIXME: This should probably live in ValueTracking (or similar).
800 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
801  const DataLayout &DL) {
802  SmallPtrSet<Value *, 4> Visited;
803  SmallVector<Value *, 4> Worklist(1, V);
804 
805  do {
806  Value *P = Worklist.pop_back_val();
807  P = P->stripPointerCasts();
808 
809  if (!Visited.insert(P).second)
810  continue;
811 
812  if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
813  Worklist.push_back(SI->getTrueValue());
814  Worklist.push_back(SI->getFalseValue());
815  continue;
816  }
817 
818  if (PHINode *PN = dyn_cast<PHINode>(P)) {
819  for (Value *IncValue : PN->incoming_values())
820  Worklist.push_back(IncValue);
821  continue;
822  }
823 
824  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
825  if (GA->isInterposable())
826  return false;
827  Worklist.push_back(GA->getAliasee());
828  continue;
829  }
830 
831  // If we know how big this object is, and it is less than MaxSize, continue
832  // searching. Otherwise, return false.
833  if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
834  if (!AI->getAllocatedType()->isSized())
835  return false;
836 
837  ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
838  if (!CS)
839  return false;
840 
841  uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
842  // Make sure that, even if the multiplication below would wrap as an
843  // uint64_t, we still do the right thing.
844  if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
845  return false;
846  continue;
847  }
848 
849  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
850  if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
851  return false;
852 
853  uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
854  if (InitSize > MaxSize)
855  return false;
856  continue;
857  }
858 
859  return false;
860  } while (!Worklist.empty());
861 
862  return true;
863 }
864 
865 // If we're indexing into an object of a known size, and the outer index is
866 // not a constant, but having any value but zero would lead to undefined
867 // behavior, replace it with zero.
868 //
869 // For example, if we have:
870 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
871 // ...
872 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
873 // ... = load i32* %arrayidx, align 4
874 // Then we know that we can replace %x in the GEP with i64 0.
875 //
876 // FIXME: We could fold any GEP index to zero that would cause UB if it were
877 // not zero. Currently, we only handle the first such index. Also, we could
878 // also search through non-zero constant indices if we kept track of the
879 // offsets those indices implied.
881  Instruction *MemI, unsigned &Idx) {
882  if (GEPI->getNumOperands() < 2)
883  return false;
884 
885  // Find the first non-zero index of a GEP. If all indices are zero, return
886  // one past the last index.
887  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
888  unsigned I = 1;
889  for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
890  Value *V = GEPI->getOperand(I);
891  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
892  if (CI->isZero())
893  continue;
894 
895  break;
896  }
897 
898  return I;
899  };
900 
901  // Skip through initial 'zero' indices, and find the corresponding pointer
902  // type. See if the next index is not a constant.
903  Idx = FirstNZIdx(GEPI);
904  if (Idx == GEPI->getNumOperands())
905  return false;
906  if (isa<Constant>(GEPI->getOperand(Idx)))
907  return false;
908 
909  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
910  Type *AllocTy =
912  if (!AllocTy || !AllocTy->isSized())
913  return false;
914  const DataLayout &DL = IC.getDataLayout();
915  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
916 
917  // If there are more indices after the one we might replace with a zero, make
918  // sure they're all non-negative. If any of them are negative, the overall
919  // address being computed might be before the base address determined by the
920  // first non-zero index.
921  auto IsAllNonNegative = [&]() {
922  for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
923  KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
924  if (Known.isNonNegative())
925  continue;
926  return false;
927  }
928 
929  return true;
930  };
931 
932  // FIXME: If the GEP is not inbounds, and there are extra indices after the
933  // one we'll replace, those could cause the address computation to wrap
934  // (rendering the IsAllNonNegative() check below insufficient). We can do
935  // better, ignoring zero indices (and other indices we can prove small
936  // enough not to wrap).
937  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
938  return false;
939 
940  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
941  // also known to be dereferenceable.
942  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
943  IsAllNonNegative();
944 }
945 
946 // If we're indexing into an object with a variable index for the memory
947 // access, but the object has only one element, we can assume that the index
948 // will always be zero. If we replace the GEP, return it.
949 template <typename T>
951  T &MemI) {
952  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
953  unsigned Idx;
954  if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
955  Instruction *NewGEPI = GEPI->clone();
956  NewGEPI->setOperand(Idx,
957  ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
958  NewGEPI->insertBefore(GEPI);
959  MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
960  return NewGEPI;
961  }
962  }
963 
964  return nullptr;
965 }
966 
969  return false;
970 
971  auto *Ptr = SI.getPointerOperand();
972  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
973  Ptr = GEPI->getOperand(0);
974  return (isa<ConstantPointerNull>(Ptr) &&
976 }
977 
979  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
980  const Value *GEPI0 = GEPI->getOperand(0);
981  if (isa<ConstantPointerNull>(GEPI0) &&
982  !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
983  return true;
984  }
985  if (isa<UndefValue>(Op) ||
986  (isa<ConstantPointerNull>(Op) &&
988  return true;
989  return false;
990 }
991 
993  Value *Op = LI.getOperand(0);
994 
995  // Try to canonicalize the loaded type.
996  if (Instruction *Res = combineLoadToOperationType(*this, LI))
997  return Res;
998 
999  // Attempt to improve the alignment.
1000  unsigned KnownAlign = getOrEnforceKnownAlignment(
1001  Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
1002  unsigned LoadAlign = LI.getAlignment();
1003  unsigned EffectiveLoadAlign =
1004  LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
1005 
1006  if (KnownAlign > EffectiveLoadAlign)
1007  LI.setAlignment(KnownAlign);
1008  else if (LoadAlign == 0)
1009  LI.setAlignment(EffectiveLoadAlign);
1010 
1011  // Replace GEP indices if possible.
1012  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
1013  Worklist.Add(NewGEPI);
1014  return &LI;
1015  }
1016 
1017  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1018  return Res;
1019 
1020  // Do really simple store-to-load forwarding and load CSE, to catch cases
1021  // where there are several consecutive memory accesses to the same location,
1022  // separated by a few arithmetic operations.
1023  BasicBlock::iterator BBI(LI);
1024  bool IsLoadCSE = false;
1025  if (Value *AvailableVal = FindAvailableLoadedValue(
1026  &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1027  if (IsLoadCSE)
1028  combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1029 
1030  return replaceInstUsesWith(
1031  LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1032  LI.getName() + ".cast"));
1033  }
1034 
1035  // None of the following transforms are legal for volatile/ordered atomic
1036  // loads. Most of them do apply for unordered atomics.
1037  if (!LI.isUnordered()) return nullptr;
1038 
1039  // load(gep null, ...) -> unreachable
1040  // load null/undef -> unreachable
1041  // TODO: Consider a target hook for valid address spaces for this xforms.
1042  if (canSimplifyNullLoadOrGEP(LI, Op)) {
1043  // Insert a new store to null instruction before the load to indicate
1044  // that this code is not reachable. We do this instead of inserting
1045  // an unreachable instruction directly because we cannot modify the
1046  // CFG.
1048  Constant::getNullValue(Op->getType()), &LI);
1049  SI->setDebugLoc(LI.getDebugLoc());
1050  return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1051  }
1052 
1053  if (Op->hasOneUse()) {
1054  // Change select and PHI nodes to select values instead of addresses: this
1055  // helps alias analysis out a lot, allows many others simplifications, and
1056  // exposes redundancy in the code.
1057  //
1058  // Note that we cannot do the transformation unless we know that the
1059  // introduced loads cannot trap! Something like this is valid as long as
1060  // the condition is always false: load (select bool %C, int* null, int* %G),
1061  // but it would not be valid if we transformed it to load from null
1062  // unconditionally.
1063  //
1064  if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1065  // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1066  unsigned Align = LI.getAlignment();
1067  if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align,
1068  DL, SI) &&
1069  isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align,
1070  DL, SI)) {
1071  LoadInst *V1 =
1072  Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1073  SI->getOperand(1)->getName() + ".val");
1074  LoadInst *V2 =
1075  Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1076  SI->getOperand(2)->getName() + ".val");
1077  assert(LI.isUnordered() && "implied by above");
1078  V1->setAlignment(Align);
1079  V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1080  V2->setAlignment(Align);
1081  V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1082  return SelectInst::Create(SI->getCondition(), V1, V2);
1083  }
1084 
1085  // load (select (cond, null, P)) -> load P
1086  if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1087  !NullPointerIsDefined(SI->getFunction(),
1088  LI.getPointerAddressSpace())) {
1089  LI.setOperand(0, SI->getOperand(2));
1090  return &LI;
1091  }
1092 
1093  // load (select (cond, P, null)) -> load P
1094  if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1095  !NullPointerIsDefined(SI->getFunction(),
1096  LI.getPointerAddressSpace())) {
1097  LI.setOperand(0, SI->getOperand(1));
1098  return &LI;
1099  }
1100  }
1101  }
1102  return nullptr;
1103 }
1104 
1105 /// Look for extractelement/insertvalue sequence that acts like a bitcast.
1106 ///
1107 /// \returns underlying value that was "cast", or nullptr otherwise.
1108 ///
1109 /// For example, if we have:
1110 ///
1111 /// %E0 = extractelement <2 x double> %U, i32 0
1112 /// %V0 = insertvalue [2 x double] undef, double %E0, 0
1113 /// %E1 = extractelement <2 x double> %U, i32 1
1114 /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1115 ///
1116 /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1117 /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1118 /// Note that %U may contain non-undef values where %V1 has undef.
1120  Value *U = nullptr;
1121  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1122  auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1123  if (!E)
1124  return nullptr;
1125  auto *W = E->getVectorOperand();
1126  if (!U)
1127  U = W;
1128  else if (U != W)
1129  return nullptr;
1130  auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1131  if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1132  return nullptr;
1133  V = IV->getAggregateOperand();
1134  }
1135  if (!isa<UndefValue>(V) ||!U)
1136  return nullptr;
1137 
1138  auto *UT = cast<VectorType>(U->getType());
1139  auto *VT = V->getType();
1140  // Check that types UT and VT are bitwise isomorphic.
1141  const auto &DL = IC.getDataLayout();
1142  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1143  return nullptr;
1144  }
1145  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1146  if (AT->getNumElements() != UT->getNumElements())
1147  return nullptr;
1148  } else {
1149  auto *ST = cast<StructType>(VT);
1150  if (ST->getNumElements() != UT->getNumElements())
1151  return nullptr;
1152  for (const auto *EltT : ST->elements()) {
1153  if (EltT != UT->getElementType())
1154  return nullptr;
1155  }
1156  }
1157  return U;
1158 }
1159 
1160 /// Combine stores to match the type of value being stored.
1161 ///
1162 /// The core idea here is that the memory does not have any intrinsic type and
1163 /// where we can we should match the type of a store to the type of value being
1164 /// stored.
1165 ///
1166 /// However, this routine must never change the width of a store or the number of
1167 /// stores as that would introduce a semantic change. This combine is expected to
1168 /// be a semantic no-op which just allows stores to more closely model the types
1169 /// of their incoming values.
1170 ///
1171 /// Currently, we also refuse to change the precise type used for an atomic or
1172 /// volatile store. This is debatable, and might be reasonable to change later.
1173 /// However, it is risky in case some backend or other part of LLVM is relying
1174 /// on the exact type stored to select appropriate atomic operations.
1175 ///
1176 /// \returns true if the store was successfully combined away. This indicates
1177 /// the caller must erase the store instruction. We have to let the caller erase
1178 /// the store instruction as otherwise there is no way to signal whether it was
1179 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
1181  // FIXME: We could probably with some care handle both volatile and ordered
1182  // atomic stores here but it isn't clear that this is important.
1183  if (!SI.isUnordered())
1184  return false;
1185 
1186  // swifterror values can't be bitcasted.
1187  if (SI.getPointerOperand()->isSwiftError())
1188  return false;
1189 
1190  Value *V = SI.getValueOperand();
1191 
1192  // Fold away bit casts of the stored value by storing the original type.
1193  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1194  V = BC->getOperand(0);
1195  if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1196  combineStoreToNewValue(IC, SI, V);
1197  return true;
1198  }
1199  }
1200 
1201  if (Value *U = likeBitCastFromVector(IC, V))
1202  if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1203  combineStoreToNewValue(IC, SI, U);
1204  return true;
1205  }
1206 
1207  // FIXME: We should also canonicalize stores of vectors when their elements
1208  // are cast to other types.
1209  return false;
1210 }
1211 
1213  // FIXME: We could probably with some care handle both volatile and atomic
1214  // stores here but it isn't clear that this is important.
1215  if (!SI.isSimple())
1216  return false;
1217 
1218  Value *V = SI.getValueOperand();
1219  Type *T = V->getType();
1220 
1221  if (!T->isAggregateType())
1222  return false;
1223 
1224  if (auto *ST = dyn_cast<StructType>(T)) {
1225  // If the struct only have one element, we unpack.
1226  unsigned Count = ST->getNumElements();
1227  if (Count == 1) {
1228  V = IC.Builder.CreateExtractValue(V, 0);
1229  combineStoreToNewValue(IC, SI, V);
1230  return true;
1231  }
1232 
1233  // We don't want to break loads with padding here as we'd loose
1234  // the knowledge that padding exists for the rest of the pipeline.
1235  const DataLayout &DL = IC.getDataLayout();
1236  auto *SL = DL.getStructLayout(ST);
1237  if (SL->hasPadding())
1238  return false;
1239 
1240  auto Align = SI.getAlignment();
1241  if (!Align)
1243 
1244  SmallString<16> EltName = V->getName();
1245  EltName += ".elt";
1246  auto *Addr = SI.getPointerOperand();
1247  SmallString<16> AddrName = Addr->getName();
1248  AddrName += ".repack";
1249 
1250  auto *IdxType = Type::getInt32Ty(ST->getContext());
1251  auto *Zero = ConstantInt::get(IdxType, 0);
1252  for (unsigned i = 0; i < Count; i++) {
1253  Value *Indices[2] = {
1254  Zero,
1255  ConstantInt::get(IdxType, i),
1256  };
1257  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1258  AddrName);
1259  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1260  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1261  llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1262  AAMDNodes AAMD;
1263  SI.getAAMetadata(AAMD);
1264  NS->setAAMetadata(AAMD);
1265  }
1266 
1267  return true;
1268  }
1269 
1270  if (auto *AT = dyn_cast<ArrayType>(T)) {
1271  // If the array only have one element, we unpack.
1272  auto NumElements = AT->getNumElements();
1273  if (NumElements == 1) {
1274  V = IC.Builder.CreateExtractValue(V, 0);
1275  combineStoreToNewValue(IC, SI, V);
1276  return true;
1277  }
1278 
1279  // Bail out if the array is too large. Ideally we would like to optimize
1280  // arrays of arbitrary size but this has a terrible impact on compile time.
1281  // The threshold here is chosen arbitrarily, maybe needs a little bit of
1282  // tuning.
1283  if (NumElements > IC.MaxArraySizeForCombine)
1284  return false;
1285 
1286  const DataLayout &DL = IC.getDataLayout();
1287  auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1288  auto Align = SI.getAlignment();
1289  if (!Align)
1290  Align = DL.getABITypeAlignment(T);
1291 
1292  SmallString<16> EltName = V->getName();
1293  EltName += ".elt";
1294  auto *Addr = SI.getPointerOperand();
1295  SmallString<16> AddrName = Addr->getName();
1296  AddrName += ".repack";
1297 
1298  auto *IdxType = Type::getInt64Ty(T->getContext());
1299  auto *Zero = ConstantInt::get(IdxType, 0);
1300 
1301  uint64_t Offset = 0;
1302  for (uint64_t i = 0; i < NumElements; i++) {
1303  Value *Indices[2] = {
1304  Zero,
1305  ConstantInt::get(IdxType, i),
1306  };
1307  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1308  AddrName);
1309  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1310  auto EltAlign = MinAlign(Align, Offset);
1311  Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1312  AAMDNodes AAMD;
1313  SI.getAAMetadata(AAMD);
1314  NS->setAAMetadata(AAMD);
1315  Offset += EltSize;
1316  }
1317 
1318  return true;
1319  }
1320 
1321  return false;
1322 }
1323 
1324 /// equivalentAddressValues - Test if A and B will obviously have the same
1325 /// value. This includes recognizing that %t0 and %t1 will have the same
1326 /// value in code like this:
1327 /// %t0 = getelementptr \@a, 0, 3
1328 /// store i32 0, i32* %t0
1329 /// %t1 = getelementptr \@a, 0, 3
1330 /// %t2 = load i32* %t1
1331 ///
1333  // Test if the values are trivially equivalent.
1334  if (A == B) return true;
1335 
1336  // Test if the values come form identical arithmetic instructions.
1337  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1338  // its only used to compare two uses within the same basic block, which
1339  // means that they'll always either have the same value or one of them
1340  // will have an undefined value.
1341  if (isa<BinaryOperator>(A) ||
1342  isa<CastInst>(A) ||
1343  isa<PHINode>(A) ||
1344  isa<GetElementPtrInst>(A))
1345  if (Instruction *BI = dyn_cast<Instruction>(B))
1346  if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1347  return true;
1348 
1349  // Otherwise they may not be equivalent.
1350  return false;
1351 }
1352 
1353 /// Converts store (bitcast (load (bitcast (select ...)))) to
1354 /// store (load (select ...)), where select is minmax:
1355 /// select ((cmp load V1, load V2), V1, V2).
1357  StoreInst &SI) {
1358  // bitcast?
1359  if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1360  return false;
1361  // load? integer?
1362  Value *LoadAddr;
1363  if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1364  return false;
1365  auto *LI = cast<LoadInst>(SI.getValueOperand());
1366  if (!LI->getType()->isIntegerTy())
1367  return false;
1368  if (!isMinMaxWithLoads(LoadAddr))
1369  return false;
1370 
1371  if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1372  auto *SI = dyn_cast<StoreInst>(U);
1373  return SI && SI->getPointerOperand() != LI &&
1374  peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1375  !SI->getPointerOperand()->isSwiftError();
1376  }))
1377  return false;
1378 
1379  IC.Builder.SetInsertPoint(LI);
1380  LoadInst *NewLI = combineLoadToNewType(
1381  IC, *LI, LoadAddr->getType()->getPointerElementType());
1382  // Replace all the stores with stores of the newly loaded value.
1383  for (auto *UI : LI->users()) {
1384  auto *USI = cast<StoreInst>(UI);
1385  IC.Builder.SetInsertPoint(USI);
1386  combineStoreToNewValue(IC, *USI, NewLI);
1387  }
1388  IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1389  IC.eraseInstFromFunction(*LI);
1390  return true;
1391 }
1392 
1394  Value *Val = SI.getOperand(0);
1395  Value *Ptr = SI.getOperand(1);
1396 
1397  // Try to canonicalize the stored type.
1398  if (combineStoreToValueType(*this, SI))
1399  return eraseInstFromFunction(SI);
1400 
1401  // Attempt to improve the alignment.
1402  unsigned KnownAlign = getOrEnforceKnownAlignment(
1403  Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
1404  unsigned StoreAlign = SI.getAlignment();
1405  unsigned EffectiveStoreAlign =
1406  StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
1407 
1408  if (KnownAlign > EffectiveStoreAlign)
1409  SI.setAlignment(KnownAlign);
1410  else if (StoreAlign == 0)
1411  SI.setAlignment(EffectiveStoreAlign);
1412 
1413  // Try to canonicalize the stored type.
1414  if (unpackStoreToAggregate(*this, SI))
1415  return eraseInstFromFunction(SI);
1416 
1417  if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1418  return eraseInstFromFunction(SI);
1419 
1420  // Replace GEP indices if possible.
1421  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1422  Worklist.Add(NewGEPI);
1423  return &SI;
1424  }
1425 
1426  // Don't hack volatile/ordered stores.
1427  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1428  if (!SI.isUnordered()) return nullptr;
1429 
1430  // If the RHS is an alloca with a single use, zapify the store, making the
1431  // alloca dead.
1432  if (Ptr->hasOneUse()) {
1433  if (isa<AllocaInst>(Ptr))
1434  return eraseInstFromFunction(SI);
1435  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1436  if (isa<AllocaInst>(GEP->getOperand(0))) {
1437  if (GEP->getOperand(0)->hasOneUse())
1438  return eraseInstFromFunction(SI);
1439  }
1440  }
1441  }
1442 
1443  // If we have a store to a location which is known constant, we can conclude
1444  // that the store must be storing the constant value (else the memory
1445  // wouldn't be constant), and this must be a noop.
1446  if (AA->pointsToConstantMemory(Ptr))
1447  return eraseInstFromFunction(SI);
1448 
1449  // Do really simple DSE, to catch cases where there are several consecutive
1450  // stores to the same location, separated by a few arithmetic operations. This
1451  // situation often occurs with bitfield accesses.
1452  BasicBlock::iterator BBI(SI);
1453  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1454  --ScanInsts) {
1455  --BBI;
1456  // Don't count debug info directives, lest they affect codegen,
1457  // and we skip pointer-to-pointer bitcasts, which are NOPs.
1458  if (isa<DbgInfoIntrinsic>(BBI) ||
1459  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1460  ScanInsts++;
1461  continue;
1462  }
1463 
1464  if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1465  // Prev store isn't volatile, and stores to the same location?
1466  if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1467  SI.getOperand(1))) {
1468  ++NumDeadStore;
1469  ++BBI;
1470  eraseInstFromFunction(*PrevSI);
1471  continue;
1472  }
1473  break;
1474  }
1475 
1476  // If this is a load, we have to stop. However, if the loaded value is from
1477  // the pointer we're loading and is producing the pointer we're storing,
1478  // then *this* store is dead (X = load P; store X -> P).
1479  if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1480  if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1481  assert(SI.isUnordered() && "can't eliminate ordering operation");
1482  return eraseInstFromFunction(SI);
1483  }
1484 
1485  // Otherwise, this is a load from some other location. Stores before it
1486  // may not be dead.
1487  break;
1488  }
1489 
1490  // Don't skip over loads, throws or things that can modify memory.
1491  if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1492  break;
1493  }
1494 
1495  // store X, null -> turns into 'unreachable' in SimplifyCFG
1496  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1497  if (canSimplifyNullStoreOrGEP(SI)) {
1498  if (!isa<UndefValue>(Val)) {
1499  SI.setOperand(0, UndefValue::get(Val->getType()));
1500  if (Instruction *U = dyn_cast<Instruction>(Val))
1501  Worklist.Add(U); // Dropped a use.
1502  }
1503  return nullptr; // Do not modify these!
1504  }
1505 
1506  // store undef, Ptr -> noop
1507  if (isa<UndefValue>(Val))
1508  return eraseInstFromFunction(SI);
1509 
1510  // If this store is the second-to-last instruction in the basic block
1511  // (excluding debug info and bitcasts of pointers) and if the block ends with
1512  // an unconditional branch, try to move the store to the successor block.
1513  BBI = SI.getIterator();
1514  do {
1515  ++BBI;
1516  } while (isa<DbgInfoIntrinsic>(BBI) ||
1517  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
1518 
1519  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1520  if (BI->isUnconditional())
1521  mergeStoreIntoSuccessor(SI);
1522 
1523  return nullptr;
1524 }
1525 
1526 /// Try to transform:
1527 /// if () { *P = v1; } else { *P = v2 }
1528 /// or:
1529 /// *P = v1; if () { *P = v2; }
1530 /// into a phi node with a store in the successor.
1531 bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
1532  assert(SI.isUnordered() &&
1533  "This code has not been audited for volatile or ordered store case.");
1534 
1535  // Check if the successor block has exactly 2 incoming edges.
1536  BasicBlock *StoreBB = SI.getParent();
1537  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1538  if (!DestBB->hasNPredecessors(2))
1539  return false;
1540 
1541  // Capture the other block (the block that doesn't contain our store).
1542  pred_iterator PredIter = pred_begin(DestBB);
1543  if (*PredIter == StoreBB)
1544  ++PredIter;
1545  BasicBlock *OtherBB = *PredIter;
1546 
1547  // Bail out if all of the relevant blocks aren't distinct. This can happen,
1548  // for example, if SI is in an infinite loop.
1549  if (StoreBB == DestBB || OtherBB == DestBB)
1550  return false;
1551 
1552  // Verify that the other block ends in a branch and is not otherwise empty.
1553  BasicBlock::iterator BBI(OtherBB->getTerminator());
1554  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1555  if (!OtherBr || BBI == OtherBB->begin())
1556  return false;
1557 
1558  // If the other block ends in an unconditional branch, check for the 'if then
1559  // else' case. There is an instruction before the branch.
1560  StoreInst *OtherStore = nullptr;
1561  if (OtherBr->isUnconditional()) {
1562  --BBI;
1563  // Skip over debugging info.
1564  while (isa<DbgInfoIntrinsic>(BBI) ||
1565  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1566  if (BBI==OtherBB->begin())
1567  return false;
1568  --BBI;
1569  }
1570  // If this isn't a store, isn't a store to the same location, or is not the
1571  // right kind of store, bail out.
1572  OtherStore = dyn_cast<StoreInst>(BBI);
1573  if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1574  !SI.isSameOperationAs(OtherStore))
1575  return false;
1576  } else {
1577  // Otherwise, the other block ended with a conditional branch. If one of the
1578  // destinations is StoreBB, then we have the if/then case.
1579  if (OtherBr->getSuccessor(0) != StoreBB &&
1580  OtherBr->getSuccessor(1) != StoreBB)
1581  return false;
1582 
1583  // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1584  // if/then triangle. See if there is a store to the same ptr as SI that
1585  // lives in OtherBB.
1586  for (;; --BBI) {
1587  // Check to see if we find the matching store.
1588  if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1589  if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1590  !SI.isSameOperationAs(OtherStore))
1591  return false;
1592  break;
1593  }
1594  // If we find something that may be using or overwriting the stored
1595  // value, or if we run out of instructions, we can't do the transform.
1596  if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1597  BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1598  return false;
1599  }
1600 
1601  // In order to eliminate the store in OtherBr, we have to make sure nothing
1602  // reads or overwrites the stored value in StoreBB.
1603  for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1604  // FIXME: This should really be AA driven.
1605  if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1606  return false;
1607  }
1608  }
1609 
1610  // Insert a PHI node now if we need it.
1611  Value *MergedVal = OtherStore->getOperand(0);
1612  // The debug locations of the original instructions might differ. Merge them.
1614  OtherStore->getDebugLoc());
1615  if (MergedVal != SI.getOperand(0)) {
1616  PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1617  PN->addIncoming(SI.getOperand(0), SI.getParent());
1618  PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1619  MergedVal = InsertNewInstBefore(PN, DestBB->front());
1620  PN->setDebugLoc(MergedLoc);
1621  }
1622 
1623  // Advance to a place where it is safe to insert the new store and insert it.
1624  BBI = DestBB->getFirstInsertionPt();
1625  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1626  SI.isVolatile(), SI.getAlignment(),
1627  SI.getOrdering(), SI.getSyncScopeID());
1628  InsertNewInstBefore(NewSI, *BBI);
1629  NewSI->setDebugLoc(MergedLoc);
1630 
1631  // If the two stores had AA tags, merge them.
1632  AAMDNodes AATags;
1633  SI.getAAMetadata(AATags);
1634  if (AATags) {
1635  OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1636  NewSI->setAAMetadata(AATags);
1637  }
1638 
1639  // Nuke the old stores.
1640  eraseInstFromFunction(SI);
1641  eraseInstFromFunction(*OtherStore);
1642  return true;
1643 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1696
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:452
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
bool isSimple() const
Definition: Instructions.h:276
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:78
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:260
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1206
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1563
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:1623
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1612
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
void setAlignment(unsigned Align)
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
bool isUnordered() const
Definition: Instructions.h:403
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:604
void push_back(const T &Elt)
Definition: SmallVector.h:211
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:901
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:201
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt...
Definition: STLExtras.h:1366
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:247
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
static LoadInst * combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
An instruction for reading from memory.
Definition: Instructions.h:167
static bool pointsToConstantGlobal(Value *V)
pointsToConstantGlobal - Return true if V (possibly indirectly) points to some part of a constant glo...
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type. ...
Definition: DataLayout.h:460
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:395
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:128
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1649
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:270
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:376
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:337
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:894
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:654
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
static Instruction * replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, T &MemI)
The core instruction combiner logic.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Type * getSourceElementType() const
Definition: Instructions.h:972
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1958
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a nonnull metadata node to a new load instruction.
Definition: Local.cpp:2512
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static Instruction * simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI)
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:753
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:211
This class represents a no-op cast from one type to another.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:25
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
const DataLayout & getDataLayout() const
const BasicBlock & getEntryBlock() const
Definition: Function.h:664
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:766
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
#define P(N)
static Instruction * combineLoadToOperationType(InstCombiner &IC, LoadInst &LI)
Combine loads to match the type of their uses&#39; value after looking through intervening bitcasts...
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:318
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static StoreInst * combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
bool mayThrow() const
Return true if this instruction may throw an exception.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:541
bool isUnordered() const
Definition: Instructions.h:278
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a range metadata node to a new load instruction.
Definition: Local.cpp:2537
Instruction * visitAllocaInst(AllocaInst &AI)
Value * getPointerOperand()
Definition: Instructions.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setAlignment(unsigned Align)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1436
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition: Value.cpp:535
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2335
size_t size() const
Definition: SmallVector.h:52
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction *> &ToDelete)
isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) pointer to an alloca...
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2027
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
static bool isSupportedAtomicType(Type *Ty)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:254
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:746
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:257
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:643
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1523
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
Get all metadata attached to this Instruction.
Definition: Instruction.h:250
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:376
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
This class wraps the llvm.memcpy/memmove intrinsics.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static Type * getIndexedType(Type *Ty, ArrayRef< Value *> IdxList)
Returns the type of the element that would be loaded with a load instruction with the specified param...
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:372
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, StoreInst &SI)
Converts store (bitcast (load (bitcast (select ...)))) to store (load (select ...)), where select is minmax: select ((cmp load V1, load V2), V1, V2).
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:259
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static Value * likeBitCastFromVector(InstCombiner &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:582
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
Instruction * InsertNewInstBefore(Instruction *New, Instruction &Old)
Inserts an instruction New before instruction Old.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:935
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * visitLoadInst(LoadInst &LI)
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
Instruction * visitStoreInst(StoreInst &SI)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2385
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:98
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:196
bool isSimple() const
Definition: Instructions.h:401
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2343
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:412
static Instruction * unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI)
bool use_empty() const
Definition: Value.h:322
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:518
static bool isMinMaxWithLoads(Value *V)
Returns true if instruction represent minmax pattern like: select ((cmp load V1, load V2)...
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
user_iterator user_end()
Definition: Value.h:383