LLVM  9.0.0svn
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for load, store and alloca.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/MapVector.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/ConstantRange.h"
20 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/MDBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
27 using namespace llvm;
28 using namespace PatternMatch;
29 
30 #define DEBUG_TYPE "instcombine"
31 
32 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34 
35 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
36 /// some part of a constant global variable. This intentionally only accepts
37 /// constant expressions because we can't rewrite arbitrary instructions.
38 static bool pointsToConstantGlobal(Value *V) {
39  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
40  return GV->isConstant();
41 
42  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
43  if (CE->getOpcode() == Instruction::BitCast ||
44  CE->getOpcode() == Instruction::AddrSpaceCast ||
45  CE->getOpcode() == Instruction::GetElementPtr)
46  return pointsToConstantGlobal(CE->getOperand(0));
47  }
48  return false;
49 }
50 
51 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
52 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
53 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
54 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
55 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
56 /// the alloca, and if the source pointer is a pointer to a constant global, we
57 /// can optimize this.
58 static bool
61  // We track lifetime intrinsics as we encounter them. If we decide to go
62  // ahead and replace the value with the global, this lets the caller quickly
63  // eliminate the markers.
64 
65  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
66  ValuesToInspect.emplace_back(V, false);
67  while (!ValuesToInspect.empty()) {
68  auto ValuePair = ValuesToInspect.pop_back_val();
69  const bool IsOffset = ValuePair.second;
70  for (auto &U : ValuePair.first->uses()) {
71  auto *I = cast<Instruction>(U.getUser());
72 
73  if (auto *LI = dyn_cast<LoadInst>(I)) {
74  // Ignore non-volatile loads, they are always ok.
75  if (!LI->isSimple()) return false;
76  continue;
77  }
78 
79  if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
80  // If uses of the bitcast are ok, we are ok.
81  ValuesToInspect.emplace_back(I, IsOffset);
82  continue;
83  }
84  if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
85  // If the GEP has all zero indices, it doesn't offset the pointer. If it
86  // doesn't, it does.
87  ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
88  continue;
89  }
90 
91  if (auto *Call = dyn_cast<CallBase>(I)) {
92  // If this is the function being called then we treat it like a load and
93  // ignore it.
94  if (Call->isCallee(&U))
95  continue;
96 
97  unsigned DataOpNo = Call->getDataOperandNo(&U);
98  bool IsArgOperand = Call->isArgOperand(&U);
99 
100  // Inalloca arguments are clobbered by the call.
101  if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
102  return false;
103 
104  // If this is a readonly/readnone call site, then we know it is just a
105  // load (but one that potentially returns the value itself), so we can
106  // ignore it if we know that the value isn't captured.
107  if (Call->onlyReadsMemory() &&
108  (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
109  continue;
110 
111  // If this is being passed as a byval argument, the caller is making a
112  // copy, so it is only a read of the alloca.
113  if (IsArgOperand && Call->isByValArgument(DataOpNo))
114  continue;
115  }
116 
117  // Lifetime intrinsics can be handled by the caller.
118  if (I->isLifetimeStartOrEnd()) {
119  assert(I->use_empty() && "Lifetime markers have no result to use!");
120  ToDelete.push_back(I);
121  continue;
122  }
123 
124  // If this is isn't our memcpy/memmove, reject it as something we can't
125  // handle.
127  if (!MI)
128  return false;
129 
130  // If the transfer is using the alloca as a source of the transfer, then
131  // ignore it since it is a load (unless the transfer is volatile).
132  if (U.getOperandNo() == 1) {
133  if (MI->isVolatile()) return false;
134  continue;
135  }
136 
137  // If we already have seen a copy, reject the second one.
138  if (TheCopy) return false;
139 
140  // If the pointer has been offset from the start of the alloca, we can't
141  // safely handle this.
142  if (IsOffset) return false;
143 
144  // If the memintrinsic isn't using the alloca as the dest, reject it.
145  if (U.getOperandNo() != 0) return false;
146 
147  // If the source of the memcpy/move is not a constant global, reject it.
148  if (!pointsToConstantGlobal(MI->getSource()))
149  return false;
150 
151  // Otherwise, the transform is safe. Remember the copy instruction.
152  TheCopy = MI;
153  }
154  }
155  return true;
156 }
157 
158 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
159 /// modified by a copy from a constant global. If we can prove this, we can
160 /// replace any uses of the alloca with uses of the global directly.
161 static MemTransferInst *
163  SmallVectorImpl<Instruction *> &ToDelete) {
164  MemTransferInst *TheCopy = nullptr;
165  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
166  return TheCopy;
167  return nullptr;
168 }
169 
170 /// Returns true if V is dereferenceable for size of alloca.
171 static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
172  const DataLayout &DL) {
173  if (AI->isArrayAllocation())
174  return false;
175  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
176  if (!AllocaSize)
177  return false;
179  APInt(64, AllocaSize), DL);
180 }
181 
183  // Check for array size of 1 (scalar allocation).
184  if (!AI.isArrayAllocation()) {
185  // i32 1 is the canonical array size for scalar allocations.
186  if (AI.getArraySize()->getType()->isIntegerTy(32))
187  return nullptr;
188 
189  // Canonicalize it.
190  Value *V = IC.Builder.getInt32(1);
191  AI.setOperand(0, V);
192  return &AI;
193  }
194 
195  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
196  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
197  if (C->getValue().getActiveBits() <= 64) {
198  Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
199  AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
200  New->setAlignment(AI.getAlignment());
201 
202  // Scan to the end of the allocation instructions, to skip over a block of
203  // allocas if possible...also skip interleaved debug info
204  //
205  BasicBlock::iterator It(New);
206  while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
207  ++It;
208 
209  // Now that I is pointing to the first non-allocation-inst in the block,
210  // insert our getelementptr instruction...
211  //
212  Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
213  Value *NullIdx = Constant::getNullValue(IdxTy);
214  Value *Idx[2] = {NullIdx, NullIdx};
216  NewTy, New, Idx, New->getName() + ".sub");
217  IC.InsertNewInstBefore(GEP, *It);
218 
219  // Now make everything use the getelementptr instead of the original
220  // allocation.
221  return IC.replaceInstUsesWith(AI, GEP);
222  }
223  }
224 
225  if (isa<UndefValue>(AI.getArraySize()))
227 
228  // Ensure that the alloca array size argument has type intptr_t, so that
229  // any casting is exposed early.
230  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
231  if (AI.getArraySize()->getType() != IntPtrTy) {
232  Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
233  AI.setOperand(0, V);
234  return &AI;
235  }
236 
237  return nullptr;
238 }
239 
240 namespace {
241 // If I and V are pointers in different address space, it is not allowed to
242 // use replaceAllUsesWith since I and V have different types. A
243 // non-target-specific transformation should not use addrspacecast on V since
244 // the two address space may be disjoint depending on target.
245 //
246 // This class chases down uses of the old pointer until reaching the load
247 // instructions, then replaces the old pointer in the load instructions with
248 // the new pointer. If during the chasing it sees bitcast or GEP, it will
249 // create new bitcast or GEP with the new pointer and use them in the load
250 // instruction.
251 class PointerReplacer {
252 public:
253  PointerReplacer(InstCombiner &IC) : IC(IC) {}
254  void replacePointer(Instruction &I, Value *V);
255 
256 private:
257  void findLoadAndReplace(Instruction &I);
258  void replace(Instruction *I);
259  Value *getReplacement(Value *I);
260 
263  InstCombiner &IC;
264 };
265 } // end anonymous namespace
266 
267 void PointerReplacer::findLoadAndReplace(Instruction &I) {
268  for (auto U : I.users()) {
269  auto *Inst = dyn_cast<Instruction>(&*U);
270  if (!Inst)
271  return;
272  LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
273  if (isa<LoadInst>(Inst)) {
274  for (auto P : Path)
275  replace(P);
276  replace(Inst);
277  } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
278  Path.push_back(Inst);
279  findLoadAndReplace(*Inst);
280  Path.pop_back();
281  } else {
282  return;
283  }
284  }
285 }
286 
287 Value *PointerReplacer::getReplacement(Value *V) {
288  auto Loc = WorkMap.find(V);
289  if (Loc != WorkMap.end())
290  return Loc->second;
291  return nullptr;
292 }
293 
295  if (getReplacement(I))
296  return;
297 
298  if (auto *LT = dyn_cast<LoadInst>(I)) {
299  auto *V = getReplacement(LT->getPointerOperand());
300  assert(V && "Operand not replaced");
301  auto *NewI = new LoadInst(I->getType(), V);
302  NewI->takeName(LT);
303  IC.InsertNewInstWith(NewI, *LT);
304  IC.replaceInstUsesWith(*LT, NewI);
305  WorkMap[LT] = NewI;
306  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
307  auto *V = getReplacement(GEP->getPointerOperand());
308  assert(V && "Operand not replaced");
309  SmallVector<Value *, 8> Indices;
310  Indices.append(GEP->idx_begin(), GEP->idx_end());
311  auto *NewI = GetElementPtrInst::Create(
312  V->getType()->getPointerElementType(), V, Indices);
313  IC.InsertNewInstWith(NewI, *GEP);
314  NewI->takeName(GEP);
315  WorkMap[GEP] = NewI;
316  } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
317  auto *V = getReplacement(BC->getOperand(0));
318  assert(V && "Operand not replaced");
319  auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
321  auto *NewI = new BitCastInst(V, NewT);
322  IC.InsertNewInstWith(NewI, *BC);
323  NewI->takeName(BC);
324  WorkMap[BC] = NewI;
325  } else {
326  llvm_unreachable("should never reach here");
327  }
328 }
329 
330 void PointerReplacer::replacePointer(Instruction &I, Value *V) {
331 #ifndef NDEBUG
332  auto *PT = cast<PointerType>(I.getType());
333  auto *NT = cast<PointerType>(V->getType());
334  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
335  "Invalid usage");
336 #endif
337  WorkMap[&I] = V;
338  findLoadAndReplace(I);
339 }
340 
342  if (auto *I = simplifyAllocaArraySize(*this, AI))
343  return I;
344 
345  if (AI.getAllocatedType()->isSized()) {
346  // If the alignment is 0 (unspecified), assign it the preferred alignment.
347  if (AI.getAlignment() == 0)
348  AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
349 
350  // Move all alloca's of zero byte objects to the entry block and merge them
351  // together. Note that we only do this for alloca's, because malloc should
352  // allocate and return a unique pointer, even for a zero byte allocation.
353  if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
354  // For a zero sized alloca there is no point in doing an array allocation.
355  // This is helpful if the array size is a complicated expression not used
356  // elsewhere.
357  if (AI.isArrayAllocation()) {
358  AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
359  return &AI;
360  }
361 
362  // Get the first instruction in the entry block.
363  BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
364  Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
365  if (FirstInst != &AI) {
366  // If the entry block doesn't start with a zero-size alloca then move
367  // this one to the start of the entry block. There is no problem with
368  // dominance as the array size was forced to a constant earlier already.
369  AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
370  if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
371  DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
372  AI.moveBefore(FirstInst);
373  return &AI;
374  }
375 
376  // If the alignment of the entry block alloca is 0 (unspecified),
377  // assign it the preferred alignment.
378  if (EntryAI->getAlignment() == 0)
379  EntryAI->setAlignment(
380  DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
381  // Replace this zero-sized alloca with the one at the start of the entry
382  // block after ensuring that the address will be aligned enough for both
383  // types.
384  unsigned MaxAlign = std::max(EntryAI->getAlignment(),
385  AI.getAlignment());
386  EntryAI->setAlignment(MaxAlign);
387  if (AI.getType() != EntryAI->getType())
388  return new BitCastInst(EntryAI, AI.getType());
389  return replaceInstUsesWith(AI, EntryAI);
390  }
391  }
392  }
393 
394  if (AI.getAlignment()) {
395  // Check to see if this allocation is only modified by a memcpy/memmove from
396  // a constant global whose alignment is equal to or exceeds that of the
397  // allocation. If this is the case, we can change all users to use
398  // the constant global instead. This is commonly produced by the CFE by
399  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
400  // is only subsequently read.
402  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
403  unsigned SourceAlign = getOrEnforceKnownAlignment(
404  Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
405  if (AI.getAlignment() <= SourceAlign &&
406  isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
407  LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
408  LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
409  for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
410  eraseInstFromFunction(*ToDelete[i]);
411  Constant *TheSrc = cast<Constant>(Copy->getSource());
412  auto *SrcTy = TheSrc->getType();
413  auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
414  SrcTy->getPointerAddressSpace());
415  Constant *Cast =
417  if (AI.getType()->getPointerAddressSpace() ==
418  SrcTy->getPointerAddressSpace()) {
419  Instruction *NewI = replaceInstUsesWith(AI, Cast);
420  eraseInstFromFunction(*Copy);
421  ++NumGlobalCopies;
422  return NewI;
423  } else {
424  PointerReplacer PtrReplacer(*this);
425  PtrReplacer.replacePointer(AI, Cast);
426  ++NumGlobalCopies;
427  }
428  }
429  }
430  }
431 
432  // At last, use the generic allocation site handler to aggressively remove
433  // unused allocas.
434  return visitAllocSite(AI);
435 }
436 
437 // Are we allowed to form a atomic load or store of this type?
438 static bool isSupportedAtomicType(Type *Ty) {
439  return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
440 }
441 
442 /// Helper to combine a load to a new type.
443 ///
444 /// This just does the work of combining a load to a new type. It handles
445 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
446 /// loaded *value* type. This will convert it to a pointer, cast the operand to
447 /// that pointer type, load it, etc.
448 ///
449 /// Note that this will create all of the instructions with whatever insert
450 /// point the \c InstCombiner currently is using.
452  const Twine &Suffix = "") {
453  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
454  "can't fold an atomic load to requested type");
455 
456  Value *Ptr = LI.getPointerOperand();
457  unsigned AS = LI.getPointerAddressSpace();
459  LI.getAllMetadata(MD);
460 
461  Value *NewPtr = nullptr;
462  if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
463  NewPtr->getType()->getPointerElementType() == NewTy &&
464  NewPtr->getType()->getPointerAddressSpace() == AS))
465  NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
466 
467  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
468  NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
469  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
470  MDBuilder MDB(NewLoad->getContext());
471  for (const auto &MDPair : MD) {
472  unsigned ID = MDPair.first;
473  MDNode *N = MDPair.second;
474  // Note, essentially every kind of metadata should be preserved here! This
475  // routine is supposed to clone a load instruction changing *only its type*.
476  // The only metadata it makes sense to drop is metadata which is invalidated
477  // when the pointer type changes. This should essentially never be the case
478  // in LLVM, but we explicitly switch over only known metadata to be
479  // conservatively correct. If you are adding metadata to LLVM which pertains
480  // to loads, you almost certainly want to add it here.
481  switch (ID) {
482  case LLVMContext::MD_dbg:
493  // All of these directly apply.
494  NewLoad->setMetadata(ID, N);
495  break;
496 
498  copyNonnullMetadata(LI, N, *NewLoad);
499  break;
503  // These only directly apply if the new type is also a pointer.
504  if (NewTy->isPointerTy())
505  NewLoad->setMetadata(ID, N);
506  break;
508  copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
509  break;
510  }
511  }
512  return NewLoad;
513 }
514 
515 /// Combine a store to a new type.
516 ///
517 /// Returns the newly created store instruction.
519  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
520  "can't fold an atomic store of requested type");
521 
522  Value *Ptr = SI.getPointerOperand();
523  unsigned AS = SI.getPointerAddressSpace();
525  SI.getAllMetadata(MD);
526 
527  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
528  V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
529  SI.getAlignment(), SI.isVolatile());
530  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
531  for (const auto &MDPair : MD) {
532  unsigned ID = MDPair.first;
533  MDNode *N = MDPair.second;
534  // Note, essentially every kind of metadata should be preserved here! This
535  // routine is supposed to clone a store instruction changing *only its
536  // type*. The only metadata it makes sense to drop is metadata which is
537  // invalidated when the pointer type changes. This should essentially
538  // never be the case in LLVM, but we explicitly switch over only known
539  // metadata to be conservatively correct. If you are adding metadata to
540  // LLVM which pertains to stores, you almost certainly want to add it
541  // here.
542  switch (ID) {
543  case LLVMContext::MD_dbg:
553  // All of these directly apply.
554  NewStore->setMetadata(ID, N);
555  break;
562  // These don't apply for stores.
563  break;
564  }
565  }
566 
567  return NewStore;
568 }
569 
570 /// Returns true if instruction represent minmax pattern like:
571 /// select ((cmp load V1, load V2), V1, V2).
572 static bool isMinMaxWithLoads(Value *V) {
573  assert(V->getType()->isPointerTy() && "Expected pointer type.");
574  // Ignore possible ty* to ixx* bitcast.
575  V = peekThroughBitcast(V);
576  // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
577  // pattern.
578  CmpInst::Predicate Pred;
579  Instruction *L1;
580  Instruction *L2;
581  Value *LHS;
582  Value *RHS;
583  if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
584  m_Value(LHS), m_Value(RHS))))
585  return false;
586  return (match(L1, m_Load(m_Specific(LHS))) &&
587  match(L2, m_Load(m_Specific(RHS)))) ||
588  (match(L1, m_Load(m_Specific(RHS))) &&
589  match(L2, m_Load(m_Specific(LHS))));
590 }
591 
592 /// Combine loads to match the type of their uses' value after looking
593 /// through intervening bitcasts.
594 ///
595 /// The core idea here is that if the result of a load is used in an operation,
596 /// we should load the type most conducive to that operation. For example, when
597 /// loading an integer and converting that immediately to a pointer, we should
598 /// instead directly load a pointer.
599 ///
600 /// However, this routine must never change the width of a load or the number of
601 /// loads as that would introduce a semantic change. This combine is expected to
602 /// be a semantic no-op which just allows loads to more closely model the types
603 /// of their consuming operations.
604 ///
605 /// Currently, we also refuse to change the precise type used for an atomic load
606 /// or a volatile load. This is debatable, and might be reasonable to change
607 /// later. However, it is risky in case some backend or other part of LLVM is
608 /// relying on the exact type loaded to select appropriate atomic operations.
610  // FIXME: We could probably with some care handle both volatile and ordered
611  // atomic loads here but it isn't clear that this is important.
612  if (!LI.isUnordered())
613  return nullptr;
614 
615  if (LI.use_empty())
616  return nullptr;
617 
618  // swifterror values can't be bitcasted.
619  if (LI.getPointerOperand()->isSwiftError())
620  return nullptr;
621 
622  Type *Ty = LI.getType();
623  const DataLayout &DL = IC.getDataLayout();
624 
625  // Try to canonicalize loads which are only ever stored to operate over
626  // integers instead of any other type. We only do this when the loaded type
627  // is sized and has a size exactly the same as its store size and the store
628  // size is a legal integer type.
629  // Do not perform canonicalization if minmax pattern is found (to avoid
630  // infinite loop).
631  if (!Ty->isIntegerTy() && Ty->isSized() &&
633  DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
634  !DL.isNonIntegralPointerType(Ty) &&
636  peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
637  if (all_of(LI.users(), [&LI](User *U) {
638  auto *SI = dyn_cast<StoreInst>(U);
639  return SI && SI->getPointerOperand() != &LI &&
640  !SI->getPointerOperand()->isSwiftError();
641  })) {
642  LoadInst *NewLoad = combineLoadToNewType(
643  IC, LI,
645  // Replace all the stores with stores of the newly loaded value.
646  for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
647  auto *SI = cast<StoreInst>(*UI++);
649  combineStoreToNewValue(IC, *SI, NewLoad);
651  }
652  assert(LI.use_empty() && "Failed to remove all users of the load!");
653  // Return the old load so the combiner can delete it safely.
654  return &LI;
655  }
656  }
657 
658  // Fold away bit casts of the loaded value by loading the desired type.
659  // We can do this for BitCastInsts as well as casts from and to pointer types,
660  // as long as those are noops (i.e., the source or dest type have the same
661  // bitwidth as the target's pointers).
662  if (LI.hasOneUse())
663  if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
664  if (CI->isNoopCast(DL))
665  if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
666  LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
667  CI->replaceAllUsesWith(NewLoad);
668  IC.eraseInstFromFunction(*CI);
669  return &LI;
670  }
671 
672  // FIXME: We should also canonicalize loads of vectors when their elements are
673  // cast to other types.
674  return nullptr;
675 }
676 
678  // FIXME: We could probably with some care handle both volatile and atomic
679  // stores here but it isn't clear that this is important.
680  if (!LI.isSimple())
681  return nullptr;
682 
683  Type *T = LI.getType();
684  if (!T->isAggregateType())
685  return nullptr;
686 
687  StringRef Name = LI.getName();
688  assert(LI.getAlignment() && "Alignment must be set at this point");
689 
690  if (auto *ST = dyn_cast<StructType>(T)) {
691  // If the struct only have one element, we unpack.
692  auto NumElements = ST->getNumElements();
693  if (NumElements == 1) {
694  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
695  ".unpack");
696  AAMDNodes AAMD;
697  LI.getAAMetadata(AAMD);
698  NewLoad->setAAMetadata(AAMD);
700  UndefValue::get(T), NewLoad, 0, Name));
701  }
702 
703  // We don't want to break loads with padding here as we'd loose
704  // the knowledge that padding exists for the rest of the pipeline.
705  const DataLayout &DL = IC.getDataLayout();
706  auto *SL = DL.getStructLayout(ST);
707  if (SL->hasPadding())
708  return nullptr;
709 
710  auto Align = LI.getAlignment();
711  if (!Align)
713 
714  auto *Addr = LI.getPointerOperand();
715  auto *IdxType = Type::getInt32Ty(T->getContext());
716  auto *Zero = ConstantInt::get(IdxType, 0);
717 
718  Value *V = UndefValue::get(T);
719  for (unsigned i = 0; i < NumElements; i++) {
720  Value *Indices[2] = {
721  Zero,
722  ConstantInt::get(IdxType, i),
723  };
724  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
725  Name + ".elt");
726  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
727  auto *L = IC.Builder.CreateAlignedLoad(ST->getElementType(i), Ptr,
728  EltAlign, Name + ".unpack");
729  // Propagate AA metadata. It'll still be valid on the narrowed load.
730  AAMDNodes AAMD;
731  LI.getAAMetadata(AAMD);
732  L->setAAMetadata(AAMD);
733  V = IC.Builder.CreateInsertValue(V, L, i);
734  }
735 
736  V->setName(Name);
737  return IC.replaceInstUsesWith(LI, V);
738  }
739 
740  if (auto *AT = dyn_cast<ArrayType>(T)) {
741  auto *ET = AT->getElementType();
742  auto NumElements = AT->getNumElements();
743  if (NumElements == 1) {
744  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
745  AAMDNodes AAMD;
746  LI.getAAMetadata(AAMD);
747  NewLoad->setAAMetadata(AAMD);
749  UndefValue::get(T), NewLoad, 0, Name));
750  }
751 
752  // Bail out if the array is too large. Ideally we would like to optimize
753  // arrays of arbitrary size but this has a terrible impact on compile time.
754  // The threshold here is chosen arbitrarily, maybe needs a little bit of
755  // tuning.
756  if (NumElements > IC.MaxArraySizeForCombine)
757  return nullptr;
758 
759  const DataLayout &DL = IC.getDataLayout();
760  auto EltSize = DL.getTypeAllocSize(ET);
761  auto Align = LI.getAlignment();
762  if (!Align)
763  Align = DL.getABITypeAlignment(T);
764 
765  auto *Addr = LI.getPointerOperand();
766  auto *IdxType = Type::getInt64Ty(T->getContext());
767  auto *Zero = ConstantInt::get(IdxType, 0);
768 
769  Value *V = UndefValue::get(T);
770  uint64_t Offset = 0;
771  for (uint64_t i = 0; i < NumElements; i++) {
772  Value *Indices[2] = {
773  Zero,
774  ConstantInt::get(IdxType, i),
775  };
776  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
777  Name + ".elt");
778  auto *L = IC.Builder.CreateAlignedLoad(
779  AT->getElementType(), Ptr, MinAlign(Align, Offset), Name + ".unpack");
780  AAMDNodes AAMD;
781  LI.getAAMetadata(AAMD);
782  L->setAAMetadata(AAMD);
783  V = IC.Builder.CreateInsertValue(V, L, i);
784  Offset += EltSize;
785  }
786 
787  V->setName(Name);
788  return IC.replaceInstUsesWith(LI, V);
789  }
790 
791  return nullptr;
792 }
793 
794 // If we can determine that all possible objects pointed to by the provided
795 // pointer value are, not only dereferenceable, but also definitively less than
796 // or equal to the provided maximum size, then return true. Otherwise, return
797 // false (constant global values and allocas fall into this category).
798 //
799 // FIXME: This should probably live in ValueTracking (or similar).
800 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
801  const DataLayout &DL) {
802  SmallPtrSet<Value *, 4> Visited;
803  SmallVector<Value *, 4> Worklist(1, V);
804 
805  do {
806  Value *P = Worklist.pop_back_val();
807  P = P->stripPointerCasts();
808 
809  if (!Visited.insert(P).second)
810  continue;
811 
812  if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
813  Worklist.push_back(SI->getTrueValue());
814  Worklist.push_back(SI->getFalseValue());
815  continue;
816  }
817 
818  if (PHINode *PN = dyn_cast<PHINode>(P)) {
819  for (Value *IncValue : PN->incoming_values())
820  Worklist.push_back(IncValue);
821  continue;
822  }
823 
824  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
825  if (GA->isInterposable())
826  return false;
827  Worklist.push_back(GA->getAliasee());
828  continue;
829  }
830 
831  // If we know how big this object is, and it is less than MaxSize, continue
832  // searching. Otherwise, return false.
833  if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
834  if (!AI->getAllocatedType()->isSized())
835  return false;
836 
837  ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
838  if (!CS)
839  return false;
840 
841  uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
842  // Make sure that, even if the multiplication below would wrap as an
843  // uint64_t, we still do the right thing.
844  if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
845  return false;
846  continue;
847  }
848 
849  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
850  if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
851  return false;
852 
853  uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
854  if (InitSize > MaxSize)
855  return false;
856  continue;
857  }
858 
859  return false;
860  } while (!Worklist.empty());
861 
862  return true;
863 }
864 
865 // If we're indexing into an object of a known size, and the outer index is
866 // not a constant, but having any value but zero would lead to undefined
867 // behavior, replace it with zero.
868 //
869 // For example, if we have:
870 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
871 // ...
872 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
873 // ... = load i32* %arrayidx, align 4
874 // Then we know that we can replace %x in the GEP with i64 0.
875 //
876 // FIXME: We could fold any GEP index to zero that would cause UB if it were
877 // not zero. Currently, we only handle the first such index. Also, we could
878 // also search through non-zero constant indices if we kept track of the
879 // offsets those indices implied.
881  Instruction *MemI, unsigned &Idx) {
882  if (GEPI->getNumOperands() < 2)
883  return false;
884 
885  // Find the first non-zero index of a GEP. If all indices are zero, return
886  // one past the last index.
887  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
888  unsigned I = 1;
889  for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
890  Value *V = GEPI->getOperand(I);
891  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
892  if (CI->isZero())
893  continue;
894 
895  break;
896  }
897 
898  return I;
899  };
900 
901  // Skip through initial 'zero' indices, and find the corresponding pointer
902  // type. See if the next index is not a constant.
903  Idx = FirstNZIdx(GEPI);
904  if (Idx == GEPI->getNumOperands())
905  return false;
906  if (isa<Constant>(GEPI->getOperand(Idx)))
907  return false;
908 
909  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
910  Type *AllocTy =
912  if (!AllocTy || !AllocTy->isSized())
913  return false;
914  const DataLayout &DL = IC.getDataLayout();
915  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
916 
917  // If there are more indices after the one we might replace with a zero, make
918  // sure they're all non-negative. If any of them are negative, the overall
919  // address being computed might be before the base address determined by the
920  // first non-zero index.
921  auto IsAllNonNegative = [&]() {
922  for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
923  KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
924  if (Known.isNonNegative())
925  continue;
926  return false;
927  }
928 
929  return true;
930  };
931 
932  // FIXME: If the GEP is not inbounds, and there are extra indices after the
933  // one we'll replace, those could cause the address computation to wrap
934  // (rendering the IsAllNonNegative() check below insufficient). We can do
935  // better, ignoring zero indices (and other indices we can prove small
936  // enough not to wrap).
937  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
938  return false;
939 
940  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
941  // also known to be dereferenceable.
942  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
943  IsAllNonNegative();
944 }
945 
946 // If we're indexing into an object with a variable index for the memory
947 // access, but the object has only one element, we can assume that the index
948 // will always be zero. If we replace the GEP, return it.
949 template <typename T>
951  T &MemI) {
952  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
953  unsigned Idx;
954  if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
955  Instruction *NewGEPI = GEPI->clone();
956  NewGEPI->setOperand(Idx,
957  ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
958  NewGEPI->insertBefore(GEPI);
959  MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
960  return NewGEPI;
961  }
962  }
963 
964  return nullptr;
965 }
966 
969  return false;
970 
971  auto *Ptr = SI.getPointerOperand();
972  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
973  Ptr = GEPI->getOperand(0);
974  return (isa<ConstantPointerNull>(Ptr) &&
976 }
977 
979  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
980  const Value *GEPI0 = GEPI->getOperand(0);
981  if (isa<ConstantPointerNull>(GEPI0) &&
982  !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
983  return true;
984  }
985  if (isa<UndefValue>(Op) ||
986  (isa<ConstantPointerNull>(Op) &&
988  return true;
989  return false;
990 }
991 
993  Value *Op = LI.getOperand(0);
994 
995  // Try to canonicalize the loaded type.
996  if (Instruction *Res = combineLoadToOperationType(*this, LI))
997  return Res;
998 
999  // Attempt to improve the alignment.
1000  unsigned KnownAlign = getOrEnforceKnownAlignment(
1001  Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
1002  unsigned LoadAlign = LI.getAlignment();
1003  unsigned EffectiveLoadAlign =
1004  LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
1005 
1006  if (KnownAlign > EffectiveLoadAlign)
1007  LI.setAlignment(KnownAlign);
1008  else if (LoadAlign == 0)
1009  LI.setAlignment(EffectiveLoadAlign);
1010 
1011  // Replace GEP indices if possible.
1012  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
1013  Worklist.Add(NewGEPI);
1014  return &LI;
1015  }
1016 
1017  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1018  return Res;
1019 
1020  // Do really simple store-to-load forwarding and load CSE, to catch cases
1021  // where there are several consecutive memory accesses to the same location,
1022  // separated by a few arithmetic operations.
1023  BasicBlock::iterator BBI(LI);
1024  bool IsLoadCSE = false;
1025  if (Value *AvailableVal = FindAvailableLoadedValue(
1026  &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1027  if (IsLoadCSE)
1028  combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1029 
1030  return replaceInstUsesWith(
1031  LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1032  LI.getName() + ".cast"));
1033  }
1034 
1035  // None of the following transforms are legal for volatile/ordered atomic
1036  // loads. Most of them do apply for unordered atomics.
1037  if (!LI.isUnordered()) return nullptr;
1038 
1039  // load(gep null, ...) -> unreachable
1040  // load null/undef -> unreachable
1041  // TODO: Consider a target hook for valid address spaces for this xforms.
1042  if (canSimplifyNullLoadOrGEP(LI, Op)) {
1043  // Insert a new store to null instruction before the load to indicate
1044  // that this code is not reachable. We do this instead of inserting
1045  // an unreachable instruction directly because we cannot modify the
1046  // CFG.
1048  Constant::getNullValue(Op->getType()), &LI);
1049  SI->setDebugLoc(LI.getDebugLoc());
1050  return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1051  }
1052 
1053  if (Op->hasOneUse()) {
1054  // Change select and PHI nodes to select values instead of addresses: this
1055  // helps alias analysis out a lot, allows many others simplifications, and
1056  // exposes redundancy in the code.
1057  //
1058  // Note that we cannot do the transformation unless we know that the
1059  // introduced loads cannot trap! Something like this is valid as long as
1060  // the condition is always false: load (select bool %C, int* null, int* %G),
1061  // but it would not be valid if we transformed it to load from null
1062  // unconditionally.
1063  //
1064  if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1065  // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1066  unsigned Align = LI.getAlignment();
1067  if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
1068  isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
1069  LoadInst *V1 =
1070  Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1071  SI->getOperand(1)->getName() + ".val");
1072  LoadInst *V2 =
1073  Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1074  SI->getOperand(2)->getName() + ".val");
1075  assert(LI.isUnordered() && "implied by above");
1076  V1->setAlignment(Align);
1077  V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1078  V2->setAlignment(Align);
1079  V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1080  return SelectInst::Create(SI->getCondition(), V1, V2);
1081  }
1082 
1083  // load (select (cond, null, P)) -> load P
1084  if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1085  !NullPointerIsDefined(SI->getFunction(),
1086  LI.getPointerAddressSpace())) {
1087  LI.setOperand(0, SI->getOperand(2));
1088  return &LI;
1089  }
1090 
1091  // load (select (cond, P, null)) -> load P
1092  if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1093  !NullPointerIsDefined(SI->getFunction(),
1094  LI.getPointerAddressSpace())) {
1095  LI.setOperand(0, SI->getOperand(1));
1096  return &LI;
1097  }
1098  }
1099  }
1100  return nullptr;
1101 }
1102 
1103 /// Look for extractelement/insertvalue sequence that acts like a bitcast.
1104 ///
1105 /// \returns underlying value that was "cast", or nullptr otherwise.
1106 ///
1107 /// For example, if we have:
1108 ///
1109 /// %E0 = extractelement <2 x double> %U, i32 0
1110 /// %V0 = insertvalue [2 x double] undef, double %E0, 0
1111 /// %E1 = extractelement <2 x double> %U, i32 1
1112 /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1113 ///
1114 /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1115 /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1116 /// Note that %U may contain non-undef values where %V1 has undef.
1118  Value *U = nullptr;
1119  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1120  auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1121  if (!E)
1122  return nullptr;
1123  auto *W = E->getVectorOperand();
1124  if (!U)
1125  U = W;
1126  else if (U != W)
1127  return nullptr;
1128  auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1129  if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1130  return nullptr;
1131  V = IV->getAggregateOperand();
1132  }
1133  if (!isa<UndefValue>(V) ||!U)
1134  return nullptr;
1135 
1136  auto *UT = cast<VectorType>(U->getType());
1137  auto *VT = V->getType();
1138  // Check that types UT and VT are bitwise isomorphic.
1139  const auto &DL = IC.getDataLayout();
1140  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1141  return nullptr;
1142  }
1143  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1144  if (AT->getNumElements() != UT->getNumElements())
1145  return nullptr;
1146  } else {
1147  auto *ST = cast<StructType>(VT);
1148  if (ST->getNumElements() != UT->getNumElements())
1149  return nullptr;
1150  for (const auto *EltT : ST->elements()) {
1151  if (EltT != UT->getElementType())
1152  return nullptr;
1153  }
1154  }
1155  return U;
1156 }
1157 
1158 /// Combine stores to match the type of value being stored.
1159 ///
1160 /// The core idea here is that the memory does not have any intrinsic type and
1161 /// where we can we should match the type of a store to the type of value being
1162 /// stored.
1163 ///
1164 /// However, this routine must never change the width of a store or the number of
1165 /// stores as that would introduce a semantic change. This combine is expected to
1166 /// be a semantic no-op which just allows stores to more closely model the types
1167 /// of their incoming values.
1168 ///
1169 /// Currently, we also refuse to change the precise type used for an atomic or
1170 /// volatile store. This is debatable, and might be reasonable to change later.
1171 /// However, it is risky in case some backend or other part of LLVM is relying
1172 /// on the exact type stored to select appropriate atomic operations.
1173 ///
1174 /// \returns true if the store was successfully combined away. This indicates
1175 /// the caller must erase the store instruction. We have to let the caller erase
1176 /// the store instruction as otherwise there is no way to signal whether it was
1177 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
1179  // FIXME: We could probably with some care handle both volatile and ordered
1180  // atomic stores here but it isn't clear that this is important.
1181  if (!SI.isUnordered())
1182  return false;
1183 
1184  // swifterror values can't be bitcasted.
1185  if (SI.getPointerOperand()->isSwiftError())
1186  return false;
1187 
1188  Value *V = SI.getValueOperand();
1189 
1190  // Fold away bit casts of the stored value by storing the original type.
1191  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1192  V = BC->getOperand(0);
1193  if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1194  combineStoreToNewValue(IC, SI, V);
1195  return true;
1196  }
1197  }
1198 
1199  if (Value *U = likeBitCastFromVector(IC, V))
1200  if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1201  combineStoreToNewValue(IC, SI, U);
1202  return true;
1203  }
1204 
1205  // FIXME: We should also canonicalize stores of vectors when their elements
1206  // are cast to other types.
1207  return false;
1208 }
1209 
1211  // FIXME: We could probably with some care handle both volatile and atomic
1212  // stores here but it isn't clear that this is important.
1213  if (!SI.isSimple())
1214  return false;
1215 
1216  Value *V = SI.getValueOperand();
1217  Type *T = V->getType();
1218 
1219  if (!T->isAggregateType())
1220  return false;
1221 
1222  if (auto *ST = dyn_cast<StructType>(T)) {
1223  // If the struct only have one element, we unpack.
1224  unsigned Count = ST->getNumElements();
1225  if (Count == 1) {
1226  V = IC.Builder.CreateExtractValue(V, 0);
1227  combineStoreToNewValue(IC, SI, V);
1228  return true;
1229  }
1230 
1231  // We don't want to break loads with padding here as we'd loose
1232  // the knowledge that padding exists for the rest of the pipeline.
1233  const DataLayout &DL = IC.getDataLayout();
1234  auto *SL = DL.getStructLayout(ST);
1235  if (SL->hasPadding())
1236  return false;
1237 
1238  auto Align = SI.getAlignment();
1239  if (!Align)
1241 
1242  SmallString<16> EltName = V->getName();
1243  EltName += ".elt";
1244  auto *Addr = SI.getPointerOperand();
1245  SmallString<16> AddrName = Addr->getName();
1246  AddrName += ".repack";
1247 
1248  auto *IdxType = Type::getInt32Ty(ST->getContext());
1249  auto *Zero = ConstantInt::get(IdxType, 0);
1250  for (unsigned i = 0; i < Count; i++) {
1251  Value *Indices[2] = {
1252  Zero,
1253  ConstantInt::get(IdxType, i),
1254  };
1255  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1256  AddrName);
1257  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1258  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1259  llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1260  AAMDNodes AAMD;
1261  SI.getAAMetadata(AAMD);
1262  NS->setAAMetadata(AAMD);
1263  }
1264 
1265  return true;
1266  }
1267 
1268  if (auto *AT = dyn_cast<ArrayType>(T)) {
1269  // If the array only have one element, we unpack.
1270  auto NumElements = AT->getNumElements();
1271  if (NumElements == 1) {
1272  V = IC.Builder.CreateExtractValue(V, 0);
1273  combineStoreToNewValue(IC, SI, V);
1274  return true;
1275  }
1276 
1277  // Bail out if the array is too large. Ideally we would like to optimize
1278  // arrays of arbitrary size but this has a terrible impact on compile time.
1279  // The threshold here is chosen arbitrarily, maybe needs a little bit of
1280  // tuning.
1281  if (NumElements > IC.MaxArraySizeForCombine)
1282  return false;
1283 
1284  const DataLayout &DL = IC.getDataLayout();
1285  auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1286  auto Align = SI.getAlignment();
1287  if (!Align)
1288  Align = DL.getABITypeAlignment(T);
1289 
1290  SmallString<16> EltName = V->getName();
1291  EltName += ".elt";
1292  auto *Addr = SI.getPointerOperand();
1293  SmallString<16> AddrName = Addr->getName();
1294  AddrName += ".repack";
1295 
1296  auto *IdxType = Type::getInt64Ty(T->getContext());
1297  auto *Zero = ConstantInt::get(IdxType, 0);
1298 
1299  uint64_t Offset = 0;
1300  for (uint64_t i = 0; i < NumElements; i++) {
1301  Value *Indices[2] = {
1302  Zero,
1303  ConstantInt::get(IdxType, i),
1304  };
1305  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1306  AddrName);
1307  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1308  auto EltAlign = MinAlign(Align, Offset);
1309  Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1310  AAMDNodes AAMD;
1311  SI.getAAMetadata(AAMD);
1312  NS->setAAMetadata(AAMD);
1313  Offset += EltSize;
1314  }
1315 
1316  return true;
1317  }
1318 
1319  return false;
1320 }
1321 
1322 /// equivalentAddressValues - Test if A and B will obviously have the same
1323 /// value. This includes recognizing that %t0 and %t1 will have the same
1324 /// value in code like this:
1325 /// %t0 = getelementptr \@a, 0, 3
1326 /// store i32 0, i32* %t0
1327 /// %t1 = getelementptr \@a, 0, 3
1328 /// %t2 = load i32* %t1
1329 ///
1331  // Test if the values are trivially equivalent.
1332  if (A == B) return true;
1333 
1334  // Test if the values come form identical arithmetic instructions.
1335  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1336  // its only used to compare two uses within the same basic block, which
1337  // means that they'll always either have the same value or one of them
1338  // will have an undefined value.
1339  if (isa<BinaryOperator>(A) ||
1340  isa<CastInst>(A) ||
1341  isa<PHINode>(A) ||
1342  isa<GetElementPtrInst>(A))
1343  if (Instruction *BI = dyn_cast<Instruction>(B))
1344  if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1345  return true;
1346 
1347  // Otherwise they may not be equivalent.
1348  return false;
1349 }
1350 
1351 /// Converts store (bitcast (load (bitcast (select ...)))) to
1352 /// store (load (select ...)), where select is minmax:
1353 /// select ((cmp load V1, load V2), V1, V2).
1355  StoreInst &SI) {
1356  // bitcast?
1357  if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1358  return false;
1359  // load? integer?
1360  Value *LoadAddr;
1361  if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1362  return false;
1363  auto *LI = cast<LoadInst>(SI.getValueOperand());
1364  if (!LI->getType()->isIntegerTy())
1365  return false;
1366  if (!isMinMaxWithLoads(LoadAddr))
1367  return false;
1368 
1369  if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1370  auto *SI = dyn_cast<StoreInst>(U);
1371  return SI && SI->getPointerOperand() != LI &&
1372  peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1373  !SI->getPointerOperand()->isSwiftError();
1374  }))
1375  return false;
1376 
1377  IC.Builder.SetInsertPoint(LI);
1378  LoadInst *NewLI = combineLoadToNewType(
1379  IC, *LI, LoadAddr->getType()->getPointerElementType());
1380  // Replace all the stores with stores of the newly loaded value.
1381  for (auto *UI : LI->users()) {
1382  auto *USI = cast<StoreInst>(UI);
1383  IC.Builder.SetInsertPoint(USI);
1384  combineStoreToNewValue(IC, *USI, NewLI);
1385  }
1386  IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1387  IC.eraseInstFromFunction(*LI);
1388  return true;
1389 }
1390 
1392  Value *Val = SI.getOperand(0);
1393  Value *Ptr = SI.getOperand(1);
1394 
1395  // Try to canonicalize the stored type.
1396  if (combineStoreToValueType(*this, SI))
1397  return eraseInstFromFunction(SI);
1398 
1399  // Attempt to improve the alignment.
1400  unsigned KnownAlign = getOrEnforceKnownAlignment(
1401  Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
1402  unsigned StoreAlign = SI.getAlignment();
1403  unsigned EffectiveStoreAlign =
1404  StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
1405 
1406  if (KnownAlign > EffectiveStoreAlign)
1407  SI.setAlignment(KnownAlign);
1408  else if (StoreAlign == 0)
1409  SI.setAlignment(EffectiveStoreAlign);
1410 
1411  // Try to canonicalize the stored type.
1412  if (unpackStoreToAggregate(*this, SI))
1413  return eraseInstFromFunction(SI);
1414 
1415  if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1416  return eraseInstFromFunction(SI);
1417 
1418  // Replace GEP indices if possible.
1419  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1420  Worklist.Add(NewGEPI);
1421  return &SI;
1422  }
1423 
1424  // Don't hack volatile/ordered stores.
1425  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1426  if (!SI.isUnordered()) return nullptr;
1427 
1428  // If the RHS is an alloca with a single use, zapify the store, making the
1429  // alloca dead.
1430  if (Ptr->hasOneUse()) {
1431  if (isa<AllocaInst>(Ptr))
1432  return eraseInstFromFunction(SI);
1433  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1434  if (isa<AllocaInst>(GEP->getOperand(0))) {
1435  if (GEP->getOperand(0)->hasOneUse())
1436  return eraseInstFromFunction(SI);
1437  }
1438  }
1439  }
1440 
1441  // Do really simple DSE, to catch cases where there are several consecutive
1442  // stores to the same location, separated by a few arithmetic operations. This
1443  // situation often occurs with bitfield accesses.
1444  BasicBlock::iterator BBI(SI);
1445  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1446  --ScanInsts) {
1447  --BBI;
1448  // Don't count debug info directives, lest they affect codegen,
1449  // and we skip pointer-to-pointer bitcasts, which are NOPs.
1450  if (isa<DbgInfoIntrinsic>(BBI) ||
1451  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1452  ScanInsts++;
1453  continue;
1454  }
1455 
1456  if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1457  // Prev store isn't volatile, and stores to the same location?
1458  if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1459  SI.getOperand(1))) {
1460  ++NumDeadStore;
1461  ++BBI;
1462  eraseInstFromFunction(*PrevSI);
1463  continue;
1464  }
1465  break;
1466  }
1467 
1468  // If this is a load, we have to stop. However, if the loaded value is from
1469  // the pointer we're loading and is producing the pointer we're storing,
1470  // then *this* store is dead (X = load P; store X -> P).
1471  if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1472  if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1473  assert(SI.isUnordered() && "can't eliminate ordering operation");
1474  return eraseInstFromFunction(SI);
1475  }
1476 
1477  // Otherwise, this is a load from some other location. Stores before it
1478  // may not be dead.
1479  break;
1480  }
1481 
1482  // Don't skip over loads, throws or things that can modify memory.
1483  if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1484  break;
1485  }
1486 
1487  // store X, null -> turns into 'unreachable' in SimplifyCFG
1488  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1489  if (canSimplifyNullStoreOrGEP(SI)) {
1490  if (!isa<UndefValue>(Val)) {
1491  SI.setOperand(0, UndefValue::get(Val->getType()));
1492  if (Instruction *U = dyn_cast<Instruction>(Val))
1493  Worklist.Add(U); // Dropped a use.
1494  }
1495  return nullptr; // Do not modify these!
1496  }
1497 
1498  // store undef, Ptr -> noop
1499  if (isa<UndefValue>(Val))
1500  return eraseInstFromFunction(SI);
1501 
1502  // If this store is the second-to-last instruction in the basic block
1503  // (excluding debug info and bitcasts of pointers) and if the block ends with
1504  // an unconditional branch, try to move the store to the successor block.
1505  BBI = SI.getIterator();
1506  do {
1507  ++BBI;
1508  } while (isa<DbgInfoIntrinsic>(BBI) ||
1509  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
1510 
1511  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1512  if (BI->isUnconditional())
1513  mergeStoreIntoSuccessor(SI);
1514 
1515  return nullptr;
1516 }
1517 
1518 /// Try to transform:
1519 /// if () { *P = v1; } else { *P = v2 }
1520 /// or:
1521 /// *P = v1; if () { *P = v2; }
1522 /// into a phi node with a store in the successor.
1523 bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
1524  assert(SI.isUnordered() &&
1525  "This code has not been audited for volatile or ordered store case.");
1526 
1527  // Check if the successor block has exactly 2 incoming edges.
1528  BasicBlock *StoreBB = SI.getParent();
1529  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1530  if (!DestBB->hasNPredecessors(2))
1531  return false;
1532 
1533  // Capture the other block (the block that doesn't contain our store).
1534  pred_iterator PredIter = pred_begin(DestBB);
1535  if (*PredIter == StoreBB)
1536  ++PredIter;
1537  BasicBlock *OtherBB = *PredIter;
1538 
1539  // Bail out if all of the relevant blocks aren't distinct. This can happen,
1540  // for example, if SI is in an infinite loop.
1541  if (StoreBB == DestBB || OtherBB == DestBB)
1542  return false;
1543 
1544  // Verify that the other block ends in a branch and is not otherwise empty.
1545  BasicBlock::iterator BBI(OtherBB->getTerminator());
1546  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1547  if (!OtherBr || BBI == OtherBB->begin())
1548  return false;
1549 
1550  // If the other block ends in an unconditional branch, check for the 'if then
1551  // else' case. There is an instruction before the branch.
1552  StoreInst *OtherStore = nullptr;
1553  if (OtherBr->isUnconditional()) {
1554  --BBI;
1555  // Skip over debugging info.
1556  while (isa<DbgInfoIntrinsic>(BBI) ||
1557  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1558  if (BBI==OtherBB->begin())
1559  return false;
1560  --BBI;
1561  }
1562  // If this isn't a store, isn't a store to the same location, or is not the
1563  // right kind of store, bail out.
1564  OtherStore = dyn_cast<StoreInst>(BBI);
1565  if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1566  !SI.isSameOperationAs(OtherStore))
1567  return false;
1568  } else {
1569  // Otherwise, the other block ended with a conditional branch. If one of the
1570  // destinations is StoreBB, then we have the if/then case.
1571  if (OtherBr->getSuccessor(0) != StoreBB &&
1572  OtherBr->getSuccessor(1) != StoreBB)
1573  return false;
1574 
1575  // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1576  // if/then triangle. See if there is a store to the same ptr as SI that
1577  // lives in OtherBB.
1578  for (;; --BBI) {
1579  // Check to see if we find the matching store.
1580  if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1581  if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1582  !SI.isSameOperationAs(OtherStore))
1583  return false;
1584  break;
1585  }
1586  // If we find something that may be using or overwriting the stored
1587  // value, or if we run out of instructions, we can't do the transform.
1588  if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1589  BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1590  return false;
1591  }
1592 
1593  // In order to eliminate the store in OtherBr, we have to make sure nothing
1594  // reads or overwrites the stored value in StoreBB.
1595  for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1596  // FIXME: This should really be AA driven.
1597  if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1598  return false;
1599  }
1600  }
1601 
1602  // Insert a PHI node now if we need it.
1603  Value *MergedVal = OtherStore->getOperand(0);
1604  // The debug locations of the original instructions might differ. Merge them.
1606  OtherStore->getDebugLoc());
1607  if (MergedVal != SI.getOperand(0)) {
1608  PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1609  PN->addIncoming(SI.getOperand(0), SI.getParent());
1610  PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1611  MergedVal = InsertNewInstBefore(PN, DestBB->front());
1612  PN->setDebugLoc(MergedLoc);
1613  }
1614 
1615  // Advance to a place where it is safe to insert the new store and insert it.
1616  BBI = DestBB->getFirstInsertionPt();
1617  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1618  SI.isVolatile(), SI.getAlignment(),
1619  SI.getOrdering(), SI.getSyncScopeID());
1620  InsertNewInstBefore(NewSI, *BBI);
1621  NewSI->setDebugLoc(MergedLoc);
1622 
1623  // If the two stores had AA tags, merge them.
1624  AAMDNodes AATags;
1625  SI.getAAMetadata(AATags);
1626  if (AATags) {
1627  OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1628  NewSI->setAAMetadata(AATags);
1629  }
1630 
1631  // Nuke the old stores.
1632  eraseInstFromFunction(SI);
1633  eraseInstFromFunction(*OtherStore);
1634  return true;
1635 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1512
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:645
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:452
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
bool isSimple() const
Definition: Instructions.h:276
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:78
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:260
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1196
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1379
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:1611
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1428
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
void setAlignment(unsigned Align)
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
bool isUnordered() const
Definition: Instructions.h:403
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:607
void push_back(const T &Elt)
Definition: SmallVector.h:211
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:899
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:247
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
static LoadInst * combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
An instruction for reading from memory.
Definition: Instructions.h:167
static bool pointsToConstantGlobal(Value *V)
pointsToConstantGlobal - Return true if V (possibly indirectly) points to some part of a constant glo...
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:395
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:200
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1465
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:270
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:375
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:328
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:891
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:651
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
static Instruction * replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, T &MemI)
The core instruction combiner logic.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Type * getSourceElementType() const
Definition: Instructions.h:970
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1767
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a nonnull metadata node to a new load instruction.
Definition: Local.cpp:2481
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static Instruction * simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI)
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:730
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:211
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:998
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:25
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:126
Value * getOperand(unsigned i) const
Definition: User.h:169
const DataLayout & getDataLayout() const
const BasicBlock & getEntryBlock() const
Definition: Function.h:642
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
#define P(N)
static Instruction * combineLoadToOperationType(InstCombiner &IC, LoadInst &LI)
Combine loads to match the type of their uses&#39; value after looking through intervening bitcasts...
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:321
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1264
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static StoreInst * combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
bool mayThrow() const
Return true if this instruction may throw an exception.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:501
bool isUnordered() const
Definition: Instructions.h:278
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a range metadata node to a new load instruction.
Definition: Local.cpp:2506
Instruction * visitAllocaInst(AllocaInst &AI)
Value * getPointerOperand()
Definition: Instructions.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setAlignment(unsigned Align)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2119
size_t size() const
Definition: SmallVector.h:52
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1225
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction *> &ToDelete)
isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) pointer to an alloca...
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:1836
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
static bool isSupportedAtomicType(Type *Ty)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:254
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:257
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:306
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1440
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
Get all metadata attached to this Instruction.
Definition: Instruction.h:250
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:376
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
This class wraps the llvm.memcpy/memmove intrinsics.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:324
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static Type * getIndexedType(Type *Ty, ArrayRef< Value *> IdxList)
Returns the type of the element that would be loaded with a load instruction with the specified param...
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:372
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, StoreInst &SI)
Converts store (bitcast (load (bitcast (select ...)))) to store (load (select ...)), where select is minmax: select ((cmp load V1, load V2), V1, V2).
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:259
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static Value * likeBitCastFromVector(InstCombiner &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:580
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
Instruction * InsertNewInstBefore(Instruction *New, Instruction &Old)
Inserts an instruction New before instruction Old.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:933
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * visitLoadInst(LoadInst &LI)
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
Instruction * visitStoreInst(StoreInst &SI)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2354
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:98
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:196
bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:128
bool isSimple() const
Definition: Instructions.h:401
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2127
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:412
static Instruction * unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI)
bool use_empty() const
Definition: Value.h:322
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:478
static bool isMinMaxWithLoads(Value *V)
Returns true if instruction represent minmax pattern like: select ((cmp load V1, load V2)...
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
user_iterator user_end()
Definition: Value.h:383