LLVM 20.0.0git
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for load, store and alloca.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/Loads.h"
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/LLVMContext.h"
27using namespace llvm;
28using namespace PatternMatch;
29
30#define DEBUG_TYPE "instcombine"
31
32STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34
36 "instcombine-max-copied-from-constant-users", cl::init(300),
37 cl::desc("Maximum users to visit in copy from constant transform"),
39
40/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
41/// pointer to an alloca. Ignore any reads of the pointer, return false if we
42/// see any stores or other unknown uses. If we see pointer arithmetic, keep
43/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
44/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
45/// the alloca, and if the source pointer is a pointer to a constant memory
46/// location, we can optimize this.
47static bool
49 MemTransferInst *&TheCopy,
51 // We track lifetime intrinsics as we encounter them. If we decide to go
52 // ahead and replace the value with the memory location, this lets the caller
53 // quickly eliminate the markers.
54
55 using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
58 Worklist.emplace_back(V, false);
59 while (!Worklist.empty()) {
60 ValueAndIsOffset Elem = Worklist.pop_back_val();
61 if (!Visited.insert(Elem).second)
62 continue;
63 if (Visited.size() > MaxCopiedFromConstantUsers)
64 return false;
65
66 const auto [Value, IsOffset] = Elem;
67 for (auto &U : Value->uses()) {
68 auto *I = cast<Instruction>(U.getUser());
69
70 if (auto *LI = dyn_cast<LoadInst>(I)) {
71 // Ignore non-volatile loads, they are always ok.
72 if (!LI->isSimple()) return false;
73 continue;
74 }
75
76 if (isa<PHINode, SelectInst>(I)) {
77 // We set IsOffset=true, to forbid the memcpy from occurring after the
78 // phi: If one of the phi operands is not based on the alloca, we
79 // would incorrectly omit a write.
80 Worklist.emplace_back(I, true);
81 continue;
82 }
83 if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
84 // If uses of the bitcast are ok, we are ok.
85 Worklist.emplace_back(I, IsOffset);
86 continue;
87 }
88 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
89 // If the GEP has all zero indices, it doesn't offset the pointer. If it
90 // doesn't, it does.
91 Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
92 continue;
93 }
94
95 if (auto *Call = dyn_cast<CallBase>(I)) {
96 // If this is the function being called then we treat it like a load and
97 // ignore it.
98 if (Call->isCallee(&U))
99 continue;
100
101 unsigned DataOpNo = Call->getDataOperandNo(&U);
102 bool IsArgOperand = Call->isArgOperand(&U);
103
104 // Inalloca arguments are clobbered by the call.
105 if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
106 return false;
107
108 // If this call site doesn't modify the memory, then we know it is just
109 // a load (but one that potentially returns the value itself), so we can
110 // ignore it if we know that the value isn't captured.
111 bool NoCapture = Call->doesNotCapture(DataOpNo);
112 if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) ||
113 (Call->onlyReadsMemory(DataOpNo) && NoCapture))
114 continue;
115 }
116
117 // Lifetime intrinsics can be handled by the caller.
118 if (I->isLifetimeStartOrEnd()) {
119 assert(I->use_empty() && "Lifetime markers have no result to use!");
120 ToDelete.push_back(I);
121 continue;
122 }
123
124 // If this is isn't our memcpy/memmove, reject it as something we can't
125 // handle.
126 MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
127 if (!MI)
128 return false;
129
130 // If the transfer is volatile, reject it.
131 if (MI->isVolatile())
132 return false;
133
134 // If the transfer is using the alloca as a source of the transfer, then
135 // ignore it since it is a load (unless the transfer is volatile).
136 if (U.getOperandNo() == 1)
137 continue;
138
139 // If we already have seen a copy, reject the second one.
140 if (TheCopy) return false;
141
142 // If the pointer has been offset from the start of the alloca, we can't
143 // safely handle this.
144 if (IsOffset) return false;
145
146 // If the memintrinsic isn't using the alloca as the dest, reject it.
147 if (U.getOperandNo() != 0) return false;
148
149 // If the source of the memcpy/move is not constant, reject it.
150 if (isModSet(AA->getModRefInfoMask(MI->getSource())))
151 return false;
152
153 // Otherwise, the transform is safe. Remember the copy instruction.
154 TheCopy = MI;
155 }
156 }
157 return true;
158}
159
160/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
161/// modified by a copy from a constant memory location. If we can prove this, we
162/// can replace any uses of the alloca with uses of the memory location
163/// directly.
164static MemTransferInst *
166 AllocaInst *AI,
168 MemTransferInst *TheCopy = nullptr;
169 if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
170 return TheCopy;
171 return nullptr;
172}
173
174/// Returns true if V is dereferenceable for size of alloca.
175static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
176 const DataLayout &DL) {
177 if (AI->isArrayAllocation())
178 return false;
179 uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
180 if (!AllocaSize)
181 return false;
183 APInt(64, AllocaSize), DL);
184}
185
187 AllocaInst &AI, DominatorTree &DT) {
188 // Check for array size of 1 (scalar allocation).
189 if (!AI.isArrayAllocation()) {
190 // i32 1 is the canonical array size for scalar allocations.
191 if (AI.getArraySize()->getType()->isIntegerTy(32))
192 return nullptr;
193
194 // Canonicalize it.
195 return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
196 }
197
198 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
199 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
200 if (C->getValue().getActiveBits() <= 64) {
201 Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
202 AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
203 nullptr, AI.getName());
204 New->setAlignment(AI.getAlign());
205 New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
206
207 replaceAllDbgUsesWith(AI, *New, *New, DT);
208 return IC.replaceInstUsesWith(AI, New);
209 }
210 }
211
212 if (isa<UndefValue>(AI.getArraySize()))
214
215 // Ensure that the alloca array size argument has type equal to the offset
216 // size of the alloca() pointer, which, in the tyical case, is intptr_t,
217 // so that any casting is exposed early.
218 Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
219 if (AI.getArraySize()->getType() != PtrIdxTy) {
220 Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
221 return IC.replaceOperand(AI, 0, V);
222 }
223
224 return nullptr;
225}
226
227namespace {
228// If I and V are pointers in different address space, it is not allowed to
229// use replaceAllUsesWith since I and V have different types. A
230// non-target-specific transformation should not use addrspacecast on V since
231// the two address space may be disjoint depending on target.
232//
233// This class chases down uses of the old pointer until reaching the load
234// instructions, then replaces the old pointer in the load instructions with
235// the new pointer. If during the chasing it sees bitcast or GEP, it will
236// create new bitcast or GEP with the new pointer and use them in the load
237// instruction.
238class PointerReplacer {
239public:
240 PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
241 : IC(IC), Root(Root), FromAS(SrcAS) {}
242
243 bool collectUsers();
244 void replacePointer(Value *V);
245
246private:
247 bool collectUsersRecursive(Instruction &I);
248 void replace(Instruction *I);
249 Value *getReplacement(Value *I);
250 bool isAvailable(Instruction *I) const {
251 return I == &Root || Worklist.contains(I);
252 }
253
254 bool isEqualOrValidAddrSpaceCast(const Instruction *I,
255 unsigned FromAS) const {
256 const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
257 if (!ASC)
258 return false;
259 unsigned ToAS = ASC->getDestAddressSpace();
260 return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
261 }
262
263 SmallPtrSet<Instruction *, 32> ValuesToRevisit;
267 Instruction &Root;
268 unsigned FromAS;
269};
270} // end anonymous namespace
271
272bool PointerReplacer::collectUsers() {
273 if (!collectUsersRecursive(Root))
274 return false;
275
276 // Ensure that all outstanding (indirect) users of I
277 // are inserted into the Worklist. Return false
278 // otherwise.
279 return llvm::set_is_subset(ValuesToRevisit, Worklist);
280}
281
282bool PointerReplacer::collectUsersRecursive(Instruction &I) {
283 for (auto *U : I.users()) {
284 auto *Inst = cast<Instruction>(&*U);
285 if (auto *Load = dyn_cast<LoadInst>(Inst)) {
286 if (Load->isVolatile())
287 return false;
288 Worklist.insert(Load);
289 } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
290 // All incoming values must be instructions for replacability
291 if (any_of(PHI->incoming_values(),
292 [](Value *V) { return !isa<Instruction>(V); }))
293 return false;
294
295 // If at least one incoming value of the PHI is not in Worklist,
296 // store the PHI for revisiting and skip this iteration of the
297 // loop.
298 if (any_of(PHI->incoming_values(), [this](Value *V) {
299 return !isAvailable(cast<Instruction>(V));
300 })) {
301 ValuesToRevisit.insert(Inst);
302 continue;
303 }
304
305 Worklist.insert(PHI);
306 if (!collectUsersRecursive(*PHI))
307 return false;
308 } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
309 if (!isa<Instruction>(SI->getTrueValue()) ||
310 !isa<Instruction>(SI->getFalseValue()))
311 return false;
312
313 if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
314 !isAvailable(cast<Instruction>(SI->getFalseValue()))) {
315 ValuesToRevisit.insert(Inst);
316 continue;
317 }
318 Worklist.insert(SI);
319 if (!collectUsersRecursive(*SI))
320 return false;
321 } else if (isa<GetElementPtrInst>(Inst)) {
322 Worklist.insert(Inst);
323 if (!collectUsersRecursive(*Inst))
324 return false;
325 } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
326 if (MI->isVolatile())
327 return false;
328 Worklist.insert(Inst);
329 } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
330 Worklist.insert(Inst);
331 if (!collectUsersRecursive(*Inst))
332 return false;
333 } else if (Inst->isLifetimeStartOrEnd()) {
334 continue;
335 } else {
336 // TODO: For arbitrary uses with address space mismatches, should we check
337 // if we can introduce a valid addrspacecast?
338 LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
339 return false;
340 }
341 }
342
343 return true;
344}
345
346Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
347
348void PointerReplacer::replace(Instruction *I) {
349 if (getReplacement(I))
350 return;
351
352 if (auto *LT = dyn_cast<LoadInst>(I)) {
353 auto *V = getReplacement(LT->getPointerOperand());
354 assert(V && "Operand not replaced");
355 auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
356 LT->getAlign(), LT->getOrdering(),
357 LT->getSyncScopeID());
358 NewI->takeName(LT);
359 copyMetadataForLoad(*NewI, *LT);
360
361 IC.InsertNewInstWith(NewI, LT->getIterator());
362 IC.replaceInstUsesWith(*LT, NewI);
363 WorkMap[LT] = NewI;
364 } else if (auto *PHI = dyn_cast<PHINode>(I)) {
365 Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
366 auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
367 PHI->getName(), PHI->getIterator());
368 for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
369 NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
370 PHI->getIncomingBlock(I));
371 WorkMap[PHI] = NewPHI;
372 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
373 auto *V = getReplacement(GEP->getPointerOperand());
374 assert(V && "Operand not replaced");
375 SmallVector<Value *, 8> Indices(GEP->indices());
376 auto *NewI =
377 GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
378 IC.InsertNewInstWith(NewI, GEP->getIterator());
379 NewI->takeName(GEP);
380 NewI->setNoWrapFlags(GEP->getNoWrapFlags());
381 WorkMap[GEP] = NewI;
382 } else if (auto *SI = dyn_cast<SelectInst>(I)) {
383 Value *TrueValue = SI->getTrueValue();
384 Value *FalseValue = SI->getFalseValue();
385 if (Value *Replacement = getReplacement(TrueValue))
386 TrueValue = Replacement;
387 if (Value *Replacement = getReplacement(FalseValue))
388 FalseValue = Replacement;
389 auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue,
390 SI->getName(), nullptr, SI);
391 IC.InsertNewInstWith(NewSI, SI->getIterator());
392 NewSI->takeName(SI);
393 WorkMap[SI] = NewSI;
394 } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
395 auto *DestV = MemCpy->getRawDest();
396 auto *SrcV = MemCpy->getRawSource();
397
398 if (auto *DestReplace = getReplacement(DestV))
399 DestV = DestReplace;
400 if (auto *SrcReplace = getReplacement(SrcV))
401 SrcV = SrcReplace;
402
403 IC.Builder.SetInsertPoint(MemCpy);
404 auto *NewI = IC.Builder.CreateMemTransferInst(
405 MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
406 MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
407 AAMDNodes AAMD = MemCpy->getAAMetadata();
408 if (AAMD)
409 NewI->setAAMetadata(AAMD);
410
411 IC.eraseInstFromFunction(*MemCpy);
412 WorkMap[MemCpy] = NewI;
413 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
414 auto *V = getReplacement(ASC->getPointerOperand());
415 assert(V && "Operand not replaced");
416 assert(isEqualOrValidAddrSpaceCast(
417 ASC, V->getType()->getPointerAddressSpace()) &&
418 "Invalid address space cast!");
419
420 if (V->getType()->getPointerAddressSpace() !=
421 ASC->getType()->getPointerAddressSpace()) {
422 auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
423 NewI->takeName(ASC);
424 IC.InsertNewInstWith(NewI, ASC->getIterator());
425 WorkMap[ASC] = NewI;
426 } else {
427 WorkMap[ASC] = V;
428 }
429
430 } else {
431 llvm_unreachable("should never reach here");
432 }
433}
434
435void PointerReplacer::replacePointer(Value *V) {
436#ifndef NDEBUG
437 auto *PT = cast<PointerType>(Root.getType());
438 auto *NT = cast<PointerType>(V->getType());
439 assert(PT != NT && "Invalid usage");
440#endif
441 WorkMap[&Root] = V;
442
443 for (Instruction *Workitem : Worklist)
444 replace(Workitem);
445}
446
448 if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
449 return I;
450
451 if (AI.getAllocatedType()->isSized()) {
452 // Move all alloca's of zero byte objects to the entry block and merge them
453 // together. Note that we only do this for alloca's, because malloc should
454 // allocate and return a unique pointer, even for a zero byte allocation.
456 // For a zero sized alloca there is no point in doing an array allocation.
457 // This is helpful if the array size is a complicated expression not used
458 // elsewhere.
459 if (AI.isArrayAllocation())
460 return replaceOperand(AI, 0,
461 ConstantInt::get(AI.getArraySize()->getType(), 1));
462
463 // Get the first instruction in the entry block.
464 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
465 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
466 if (FirstInst != &AI) {
467 // If the entry block doesn't start with a zero-size alloca then move
468 // this one to the start of the entry block. There is no problem with
469 // dominance as the array size was forced to a constant earlier already.
470 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
471 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
473 .getKnownMinValue() != 0) {
474 AI.moveBefore(FirstInst);
475 return &AI;
476 }
477
478 // Replace this zero-sized alloca with the one at the start of the entry
479 // block after ensuring that the address will be aligned enough for both
480 // types.
481 const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
482 EntryAI->setAlignment(MaxAlign);
483 return replaceInstUsesWith(AI, EntryAI);
484 }
485 }
486 }
487
488 // Check to see if this allocation is only modified by a memcpy/memmove from
489 // a memory location whose alignment is equal to or exceeds that of the
490 // allocation. If this is the case, we can change all users to use the
491 // constant memory location instead. This is commonly produced by the CFE by
492 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
493 // is only subsequently read.
495 if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
496 Value *TheSrc = Copy->getSource();
497 Align AllocaAlign = AI.getAlign();
498 Align SourceAlign = getOrEnforceKnownAlignment(
499 TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
500 if (AllocaAlign <= SourceAlign &&
501 isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
502 !isa<Instruction>(TheSrc)) {
503 // FIXME: Can we sink instructions without violating dominance when TheSrc
504 // is an instruction instead of a constant or argument?
505 LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
506 LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
507 unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
508 if (AI.getAddressSpace() == SrcAddrSpace) {
509 for (Instruction *Delete : ToDelete)
510 eraseInstFromFunction(*Delete);
511
512 Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
514 ++NumGlobalCopies;
515 return NewI;
516 }
517
518 PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
519 if (PtrReplacer.collectUsers()) {
520 for (Instruction *Delete : ToDelete)
521 eraseInstFromFunction(*Delete);
522
523 PtrReplacer.replacePointer(TheSrc);
524 ++NumGlobalCopies;
525 }
526 }
527 }
528
529 // At last, use the generic allocation site handler to aggressively remove
530 // unused allocas.
531 return visitAllocSite(AI);
532}
533
534// Are we allowed to form a atomic load or store of this type?
535static bool isSupportedAtomicType(Type *Ty) {
536 return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
537}
538
539/// Helper to combine a load to a new type.
540///
541/// This just does the work of combining a load to a new type. It handles
542/// metadata, etc., and returns the new instruction. The \c NewTy should be the
543/// loaded *value* type. This will convert it to a pointer, cast the operand to
544/// that pointer type, load it, etc.
545///
546/// Note that this will create all of the instructions with whatever insert
547/// point the \c InstCombinerImpl currently is using.
549 const Twine &Suffix) {
550 assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
551 "can't fold an atomic load to requested type");
552
553 LoadInst *NewLoad =
555 LI.isVolatile(), LI.getName() + Suffix);
556 NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
557 copyMetadataForLoad(*NewLoad, LI);
558 return NewLoad;
559}
560
561/// Combine a store to a new type.
562///
563/// Returns the newly created store instruction.
565 Value *V) {
566 assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
567 "can't fold an atomic store of requested type");
568
569 Value *Ptr = SI.getPointerOperand();
571 SI.getAllMetadata(MD);
572
573 StoreInst *NewStore =
574 IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
575 NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
576 for (const auto &MDPair : MD) {
577 unsigned ID = MDPair.first;
578 MDNode *N = MDPair.second;
579 // Note, essentially every kind of metadata should be preserved here! This
580 // routine is supposed to clone a store instruction changing *only its
581 // type*. The only metadata it makes sense to drop is metadata which is
582 // invalidated when the pointer type changes. This should essentially
583 // never be the case in LLVM, but we explicitly switch over only known
584 // metadata to be conservatively correct. If you are adding metadata to
585 // LLVM which pertains to stores, you almost certainly want to add it
586 // here.
587 switch (ID) {
588 case LLVMContext::MD_dbg:
589 case LLVMContext::MD_DIAssignID:
590 case LLVMContext::MD_tbaa:
591 case LLVMContext::MD_prof:
592 case LLVMContext::MD_fpmath:
593 case LLVMContext::MD_tbaa_struct:
594 case LLVMContext::MD_alias_scope:
595 case LLVMContext::MD_noalias:
596 case LLVMContext::MD_nontemporal:
597 case LLVMContext::MD_mem_parallel_loop_access:
598 case LLVMContext::MD_access_group:
599 // All of these directly apply.
600 NewStore->setMetadata(ID, N);
601 break;
602 case LLVMContext::MD_invariant_load:
603 case LLVMContext::MD_nonnull:
604 case LLVMContext::MD_noundef:
605 case LLVMContext::MD_range:
606 case LLVMContext::MD_align:
607 case LLVMContext::MD_dereferenceable:
608 case LLVMContext::MD_dereferenceable_or_null:
609 // These don't apply for stores.
610 break;
611 }
612 }
613
614 return NewStore;
615}
616
617/// Combine loads to match the type of their uses' value after looking
618/// through intervening bitcasts.
619///
620/// The core idea here is that if the result of a load is used in an operation,
621/// we should load the type most conducive to that operation. For example, when
622/// loading an integer and converting that immediately to a pointer, we should
623/// instead directly load a pointer.
624///
625/// However, this routine must never change the width of a load or the number of
626/// loads as that would introduce a semantic change. This combine is expected to
627/// be a semantic no-op which just allows loads to more closely model the types
628/// of their consuming operations.
629///
630/// Currently, we also refuse to change the precise type used for an atomic load
631/// or a volatile load. This is debatable, and might be reasonable to change
632/// later. However, it is risky in case some backend or other part of LLVM is
633/// relying on the exact type loaded to select appropriate atomic operations.
635 LoadInst &Load) {
636 // FIXME: We could probably with some care handle both volatile and ordered
637 // atomic loads here but it isn't clear that this is important.
638 if (!Load.isUnordered())
639 return nullptr;
640
641 if (Load.use_empty())
642 return nullptr;
643
644 // swifterror values can't be bitcasted.
645 if (Load.getPointerOperand()->isSwiftError())
646 return nullptr;
647
648 // Fold away bit casts of the loaded value by loading the desired type.
649 // Note that we should not do this for pointer<->integer casts,
650 // because that would result in type punning.
651 if (Load.hasOneUse()) {
652 // Don't transform when the type is x86_amx, it makes the pass that lower
653 // x86_amx type happy.
654 Type *LoadTy = Load.getType();
655 if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
656 assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
657 if (BC->getType()->isX86_AMXTy())
658 return nullptr;
659 }
660
661 if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
662 Type *DestTy = CastUser->getDestTy();
663 if (CastUser->isNoopCast(IC.getDataLayout()) &&
664 LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
665 (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
666 LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
667 CastUser->replaceAllUsesWith(NewLoad);
668 IC.eraseInstFromFunction(*CastUser);
669 return &Load;
670 }
671 }
672 }
673
674 // FIXME: We should also canonicalize loads of vectors when their elements are
675 // cast to other types.
676 return nullptr;
677}
678
680 // FIXME: We could probably with some care handle both volatile and atomic
681 // stores here but it isn't clear that this is important.
682 if (!LI.isSimple())
683 return nullptr;
684
685 Type *T = LI.getType();
686 if (!T->isAggregateType())
687 return nullptr;
688
689 StringRef Name = LI.getName();
690
691 if (auto *ST = dyn_cast<StructType>(T)) {
692 // If the struct only have one element, we unpack.
693 auto NumElements = ST->getNumElements();
694 if (NumElements == 1) {
695 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
696 ".unpack");
697 NewLoad->setAAMetadata(LI.getAAMetadata());
699 PoisonValue::get(T), NewLoad, 0, Name));
700 }
701
702 // We don't want to break loads with padding here as we'd loose
703 // the knowledge that padding exists for the rest of the pipeline.
704 const DataLayout &DL = IC.getDataLayout();
705 auto *SL = DL.getStructLayout(ST);
706
707 // Don't unpack for structure with scalable vector.
708 if (SL->getSizeInBits().isScalable())
709 return nullptr;
710
711 if (SL->hasPadding())
712 return nullptr;
713
714 const auto Align = LI.getAlign();
715 auto *Addr = LI.getPointerOperand();
716 auto *IdxType = Type::getInt32Ty(T->getContext());
717 auto *Zero = ConstantInt::get(IdxType, 0);
718
720 for (unsigned i = 0; i < NumElements; i++) {
721 Value *Indices[2] = {
722 Zero,
723 ConstantInt::get(IdxType, i),
724 };
725 auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices),
726 Name + ".elt");
727 auto *L = IC.Builder.CreateAlignedLoad(
728 ST->getElementType(i), Ptr,
729 commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
730 // Propagate AA metadata. It'll still be valid on the narrowed load.
731 L->setAAMetadata(LI.getAAMetadata());
732 V = IC.Builder.CreateInsertValue(V, L, i);
733 }
734
735 V->setName(Name);
736 return IC.replaceInstUsesWith(LI, V);
737 }
738
739 if (auto *AT = dyn_cast<ArrayType>(T)) {
740 auto *ET = AT->getElementType();
741 auto NumElements = AT->getNumElements();
742 if (NumElements == 1) {
743 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
744 NewLoad->setAAMetadata(LI.getAAMetadata());
746 PoisonValue::get(T), NewLoad, 0, Name));
747 }
748
749 // Bail out if the array is too large. Ideally we would like to optimize
750 // arrays of arbitrary size but this has a terrible impact on compile time.
751 // The threshold here is chosen arbitrarily, maybe needs a little bit of
752 // tuning.
753 if (NumElements > IC.MaxArraySizeForCombine)
754 return nullptr;
755
756 const DataLayout &DL = IC.getDataLayout();
757 TypeSize EltSize = DL.getTypeAllocSize(ET);
758 const auto Align = LI.getAlign();
759
760 auto *Addr = LI.getPointerOperand();
761 auto *IdxType = Type::getInt64Ty(T->getContext());
762 auto *Zero = ConstantInt::get(IdxType, 0);
763
766 for (uint64_t i = 0; i < NumElements; i++) {
767 Value *Indices[2] = {
768 Zero,
769 ConstantInt::get(IdxType, i),
770 };
771 auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
772 Name + ".elt");
773 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
774 auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
775 EltAlign, Name + ".unpack");
776 L->setAAMetadata(LI.getAAMetadata());
777 V = IC.Builder.CreateInsertValue(V, L, i);
778 Offset += EltSize;
779 }
780
781 V->setName(Name);
782 return IC.replaceInstUsesWith(LI, V);
783 }
784
785 return nullptr;
786}
787
788// If we can determine that all possible objects pointed to by the provided
789// pointer value are, not only dereferenceable, but also definitively less than
790// or equal to the provided maximum size, then return true. Otherwise, return
791// false (constant global values and allocas fall into this category).
792//
793// FIXME: This should probably live in ValueTracking (or similar).
795 const DataLayout &DL) {
797 SmallVector<Value *, 4> Worklist(1, V);
798
799 do {
800 Value *P = Worklist.pop_back_val();
801 P = P->stripPointerCasts();
802
803 if (!Visited.insert(P).second)
804 continue;
805
806 if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
807 Worklist.push_back(SI->getTrueValue());
808 Worklist.push_back(SI->getFalseValue());
809 continue;
810 }
811
812 if (PHINode *PN = dyn_cast<PHINode>(P)) {
813 append_range(Worklist, PN->incoming_values());
814 continue;
815 }
816
817 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
818 if (GA->isInterposable())
819 return false;
820 Worklist.push_back(GA->getAliasee());
821 continue;
822 }
823
824 // If we know how big this object is, and it is less than MaxSize, continue
825 // searching. Otherwise, return false.
826 if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
827 if (!AI->getAllocatedType()->isSized())
828 return false;
829
830 ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
831 if (!CS)
832 return false;
833
834 TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType());
835 if (TS.isScalable())
836 return false;
837 // Make sure that, even if the multiplication below would wrap as an
838 // uint64_t, we still do the right thing.
839 if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue()))
840 .ugt(MaxSize))
841 return false;
842 continue;
843 }
844
845 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
846 if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
847 return false;
848
849 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
850 if (InitSize > MaxSize)
851 return false;
852 continue;
853 }
854
855 return false;
856 } while (!Worklist.empty());
857
858 return true;
859}
860
861// If we're indexing into an object of a known size, and the outer index is
862// not a constant, but having any value but zero would lead to undefined
863// behavior, replace it with zero.
864//
865// For example, if we have:
866// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
867// ...
868// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
869// ... = load i32* %arrayidx, align 4
870// Then we know that we can replace %x in the GEP with i64 0.
871//
872// FIXME: We could fold any GEP index to zero that would cause UB if it were
873// not zero. Currently, we only handle the first such index. Also, we could
874// also search through non-zero constant indices if we kept track of the
875// offsets those indices implied.
877 GetElementPtrInst *GEPI, Instruction *MemI,
878 unsigned &Idx) {
879 if (GEPI->getNumOperands() < 2)
880 return false;
881
882 // Find the first non-zero index of a GEP. If all indices are zero, return
883 // one past the last index.
884 auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
885 unsigned I = 1;
886 for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
887 Value *V = GEPI->getOperand(I);
888 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
889 if (CI->isZero())
890 continue;
891
892 break;
893 }
894
895 return I;
896 };
897
898 // Skip through initial 'zero' indices, and find the corresponding pointer
899 // type. See if the next index is not a constant.
900 Idx = FirstNZIdx(GEPI);
901 if (Idx == GEPI->getNumOperands())
902 return false;
903 if (isa<Constant>(GEPI->getOperand(Idx)))
904 return false;
905
906 SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
907 Type *SourceElementType = GEPI->getSourceElementType();
908 // Size information about scalable vectors is not available, so we cannot
909 // deduce whether indexing at n is undefined behaviour or not. Bail out.
910 if (SourceElementType->isScalableTy())
911 return false;
912
913 Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
914 if (!AllocTy || !AllocTy->isSized())
915 return false;
916 const DataLayout &DL = IC.getDataLayout();
917 uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
918
919 // If there are more indices after the one we might replace with a zero, make
920 // sure they're all non-negative. If any of them are negative, the overall
921 // address being computed might be before the base address determined by the
922 // first non-zero index.
923 auto IsAllNonNegative = [&]() {
924 for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
925 KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
926 if (Known.isNonNegative())
927 continue;
928 return false;
929 }
930
931 return true;
932 };
933
934 // FIXME: If the GEP is not inbounds, and there are extra indices after the
935 // one we'll replace, those could cause the address computation to wrap
936 // (rendering the IsAllNonNegative() check below insufficient). We can do
937 // better, ignoring zero indices (and other indices we can prove small
938 // enough not to wrap).
939 if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
940 return false;
941
942 // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
943 // also known to be dereferenceable.
944 return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
945 IsAllNonNegative();
946}
947
948// If we're indexing into an object with a variable index for the memory
949// access, but the object has only one element, we can assume that the index
950// will always be zero. If we replace the GEP, return it.
952 Instruction &MemI) {
953 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
954 unsigned Idx;
955 if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
956 Instruction *NewGEPI = GEPI->clone();
957 NewGEPI->setOperand(Idx,
958 ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
959 IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
960 return NewGEPI;
961 }
962 }
963
964 return nullptr;
965}
966
968 if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
969 return false;
970
971 auto *Ptr = SI.getPointerOperand();
972 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
973 Ptr = GEPI->getOperand(0);
974 return (isa<ConstantPointerNull>(Ptr) &&
975 !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
976}
977
979 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
980 const Value *GEPI0 = GEPI->getOperand(0);
981 if (isa<ConstantPointerNull>(GEPI0) &&
982 !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
983 return true;
984 }
985 if (isa<UndefValue>(Op) ||
986 (isa<ConstantPointerNull>(Op) &&
988 return true;
989 return false;
990}
991
993 Value *Op = LI.getOperand(0);
994 if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
995 return replaceInstUsesWith(LI, Res);
996
997 // Try to canonicalize the loaded type.
998 if (Instruction *Res = combineLoadToOperationType(*this, LI))
999 return Res;
1000
1001 // Replace GEP indices if possible.
1002 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
1003 return replaceOperand(LI, 0, NewGEPI);
1004
1005 if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1006 return Res;
1007
1008 // Do really simple store-to-load forwarding and load CSE, to catch cases
1009 // where there are several consecutive memory accesses to the same location,
1010 // separated by a few arithmetic operations.
1011 bool IsLoadCSE = false;
1012 BatchAAResults BatchAA(*AA);
1013 if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
1014 if (IsLoadCSE)
1015 combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1016
1017 return replaceInstUsesWith(
1018 LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1019 LI.getName() + ".cast"));
1020 }
1021
1022 // None of the following transforms are legal for volatile/ordered atomic
1023 // loads. Most of them do apply for unordered atomics.
1024 if (!LI.isUnordered()) return nullptr;
1025
1026 // load(gep null, ...) -> unreachable
1027 // load null/undef -> unreachable
1028 // TODO: Consider a target hook for valid address spaces for this xforms.
1029 if (canSimplifyNullLoadOrGEP(LI, Op)) {
1032 }
1033
1034 if (Op->hasOneUse()) {
1035 // Change select and PHI nodes to select values instead of addresses: this
1036 // helps alias analysis out a lot, allows many others simplifications, and
1037 // exposes redundancy in the code.
1038 //
1039 // Note that we cannot do the transformation unless we know that the
1040 // introduced loads cannot trap! Something like this is valid as long as
1041 // the condition is always false: load (select bool %C, int* null, int* %G),
1042 // but it would not be valid if we transformed it to load from null
1043 // unconditionally.
1044 //
1045 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1046 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1047 Align Alignment = LI.getAlign();
1048 if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1049 Alignment, DL, SI) &&
1050 isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1051 Alignment, DL, SI)) {
1052 LoadInst *V1 =
1053 Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1054 SI->getOperand(1)->getName() + ".val");
1055 LoadInst *V2 =
1056 Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1057 SI->getOperand(2)->getName() + ".val");
1058 assert(LI.isUnordered() && "implied by above");
1059 V1->setAlignment(Alignment);
1060 V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1061 V2->setAlignment(Alignment);
1062 V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1063 return SelectInst::Create(SI->getCondition(), V1, V2);
1064 }
1065
1066 // load (select (cond, null, P)) -> load P
1067 if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1068 !NullPointerIsDefined(SI->getFunction(),
1070 return replaceOperand(LI, 0, SI->getOperand(2));
1071
1072 // load (select (cond, P, null)) -> load P
1073 if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1074 !NullPointerIsDefined(SI->getFunction(),
1076 return replaceOperand(LI, 0, SI->getOperand(1));
1077 }
1078 }
1079 return nullptr;
1080}
1081
1082/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1083///
1084/// \returns underlying value that was "cast", or nullptr otherwise.
1085///
1086/// For example, if we have:
1087///
1088/// %E0 = extractelement <2 x double> %U, i32 0
1089/// %V0 = insertvalue [2 x double] undef, double %E0, 0
1090/// %E1 = extractelement <2 x double> %U, i32 1
1091/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1092///
1093/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1094/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1095/// Note that %U may contain non-undef values where %V1 has undef.
1097 Value *U = nullptr;
1098 while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1099 auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1100 if (!E)
1101 return nullptr;
1102 auto *W = E->getVectorOperand();
1103 if (!U)
1104 U = W;
1105 else if (U != W)
1106 return nullptr;
1107 auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1108 if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1109 return nullptr;
1110 V = IV->getAggregateOperand();
1111 }
1112 if (!match(V, m_Undef()) || !U)
1113 return nullptr;
1114
1115 auto *UT = cast<VectorType>(U->getType());
1116 auto *VT = V->getType();
1117 // Check that types UT and VT are bitwise isomorphic.
1118 const auto &DL = IC.getDataLayout();
1119 if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1120 return nullptr;
1121 }
1122 if (auto *AT = dyn_cast<ArrayType>(VT)) {
1123 if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1124 return nullptr;
1125 } else {
1126 auto *ST = cast<StructType>(VT);
1127 if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1128 return nullptr;
1129 for (const auto *EltT : ST->elements()) {
1130 if (EltT != UT->getElementType())
1131 return nullptr;
1132 }
1133 }
1134 return U;
1135}
1136
1137/// Combine stores to match the type of value being stored.
1138///
1139/// The core idea here is that the memory does not have any intrinsic type and
1140/// where we can we should match the type of a store to the type of value being
1141/// stored.
1142///
1143/// However, this routine must never change the width of a store or the number of
1144/// stores as that would introduce a semantic change. This combine is expected to
1145/// be a semantic no-op which just allows stores to more closely model the types
1146/// of their incoming values.
1147///
1148/// Currently, we also refuse to change the precise type used for an atomic or
1149/// volatile store. This is debatable, and might be reasonable to change later.
1150/// However, it is risky in case some backend or other part of LLVM is relying
1151/// on the exact type stored to select appropriate atomic operations.
1152///
1153/// \returns true if the store was successfully combined away. This indicates
1154/// the caller must erase the store instruction. We have to let the caller erase
1155/// the store instruction as otherwise there is no way to signal whether it was
1156/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1158 // FIXME: We could probably with some care handle both volatile and ordered
1159 // atomic stores here but it isn't clear that this is important.
1160 if (!SI.isUnordered())
1161 return false;
1162
1163 // swifterror values can't be bitcasted.
1164 if (SI.getPointerOperand()->isSwiftError())
1165 return false;
1166
1167 Value *V = SI.getValueOperand();
1168
1169 // Fold away bit casts of the stored value by storing the original type.
1170 if (auto *BC = dyn_cast<BitCastInst>(V)) {
1171 assert(!BC->getType()->isX86_AMXTy() &&
1172 "store to x86_amx* should not happen!");
1173 V = BC->getOperand(0);
1174 // Don't transform when the type is x86_amx, it makes the pass that lower
1175 // x86_amx type happy.
1176 if (V->getType()->isX86_AMXTy())
1177 return false;
1178 if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1179 combineStoreToNewValue(IC, SI, V);
1180 return true;
1181 }
1182 }
1183
1184 if (Value *U = likeBitCastFromVector(IC, V))
1185 if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1186 combineStoreToNewValue(IC, SI, U);
1187 return true;
1188 }
1189
1190 // FIXME: We should also canonicalize stores of vectors when their elements
1191 // are cast to other types.
1192 return false;
1193}
1194
1196 // FIXME: We could probably with some care handle both volatile and atomic
1197 // stores here but it isn't clear that this is important.
1198 if (!SI.isSimple())
1199 return false;
1200
1201 Value *V = SI.getValueOperand();
1202 Type *T = V->getType();
1203
1204 if (!T->isAggregateType())
1205 return false;
1206
1207 if (auto *ST = dyn_cast<StructType>(T)) {
1208 // If the struct only have one element, we unpack.
1209 unsigned Count = ST->getNumElements();
1210 if (Count == 1) {
1211 V = IC.Builder.CreateExtractValue(V, 0);
1212 combineStoreToNewValue(IC, SI, V);
1213 return true;
1214 }
1215
1216 // We don't want to break loads with padding here as we'd loose
1217 // the knowledge that padding exists for the rest of the pipeline.
1218 const DataLayout &DL = IC.getDataLayout();
1219 auto *SL = DL.getStructLayout(ST);
1220
1221 // Don't unpack for structure with scalable vector.
1222 if (SL->getSizeInBits().isScalable())
1223 return false;
1224
1225 if (SL->hasPadding())
1226 return false;
1227
1228 const auto Align = SI.getAlign();
1229
1230 SmallString<16> EltName = V->getName();
1231 EltName += ".elt";
1232 auto *Addr = SI.getPointerOperand();
1233 SmallString<16> AddrName = Addr->getName();
1234 AddrName += ".repack";
1235
1236 auto *IdxType = Type::getInt32Ty(ST->getContext());
1237 auto *Zero = ConstantInt::get(IdxType, 0);
1238 for (unsigned i = 0; i < Count; i++) {
1239 Value *Indices[2] = {
1240 Zero,
1241 ConstantInt::get(IdxType, i),
1242 };
1243 auto *Ptr =
1244 IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName);
1245 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1246 auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
1247 llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1248 NS->setAAMetadata(SI.getAAMetadata());
1249 }
1250
1251 return true;
1252 }
1253
1254 if (auto *AT = dyn_cast<ArrayType>(T)) {
1255 // If the array only have one element, we unpack.
1256 auto NumElements = AT->getNumElements();
1257 if (NumElements == 1) {
1258 V = IC.Builder.CreateExtractValue(V, 0);
1259 combineStoreToNewValue(IC, SI, V);
1260 return true;
1261 }
1262
1263 // Bail out if the array is too large. Ideally we would like to optimize
1264 // arrays of arbitrary size but this has a terrible impact on compile time.
1265 // The threshold here is chosen arbitrarily, maybe needs a little bit of
1266 // tuning.
1267 if (NumElements > IC.MaxArraySizeForCombine)
1268 return false;
1269
1270 const DataLayout &DL = IC.getDataLayout();
1271 TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
1272 const auto Align = SI.getAlign();
1273
1274 SmallString<16> EltName = V->getName();
1275 EltName += ".elt";
1276 auto *Addr = SI.getPointerOperand();
1277 SmallString<16> AddrName = Addr->getName();
1278 AddrName += ".repack";
1279
1280 auto *IdxType = Type::getInt64Ty(T->getContext());
1281 auto *Zero = ConstantInt::get(IdxType, 0);
1282
1284 for (uint64_t i = 0; i < NumElements; i++) {
1285 Value *Indices[2] = {
1286 Zero,
1287 ConstantInt::get(IdxType, i),
1288 };
1289 auto *Ptr =
1290 IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
1291 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1292 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
1293 Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1294 NS->setAAMetadata(SI.getAAMetadata());
1295 Offset += EltSize;
1296 }
1297
1298 return true;
1299 }
1300
1301 return false;
1302}
1303
1304/// equivalentAddressValues - Test if A and B will obviously have the same
1305/// value. This includes recognizing that %t0 and %t1 will have the same
1306/// value in code like this:
1307/// %t0 = getelementptr \@a, 0, 3
1308/// store i32 0, i32* %t0
1309/// %t1 = getelementptr \@a, 0, 3
1310/// %t2 = load i32* %t1
1311///
1313 // Test if the values are trivially equivalent.
1314 if (A == B) return true;
1315
1316 // Test if the values come form identical arithmetic instructions.
1317 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1318 // its only used to compare two uses within the same basic block, which
1319 // means that they'll always either have the same value or one of them
1320 // will have an undefined value.
1321 if (isa<BinaryOperator>(A) ||
1322 isa<CastInst>(A) ||
1323 isa<PHINode>(A) ||
1324 isa<GetElementPtrInst>(A))
1325 if (Instruction *BI = dyn_cast<Instruction>(B))
1326 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1327 return true;
1328
1329 // Otherwise they may not be equivalent.
1330 return false;
1331}
1332
1334 Value *Val = SI.getOperand(0);
1335 Value *Ptr = SI.getOperand(1);
1336
1337 // Try to canonicalize the stored type.
1338 if (combineStoreToValueType(*this, SI))
1339 return eraseInstFromFunction(SI);
1340
1341 // Try to canonicalize the stored type.
1342 if (unpackStoreToAggregate(*this, SI))
1343 return eraseInstFromFunction(SI);
1344
1345 // Replace GEP indices if possible.
1346 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
1347 return replaceOperand(SI, 1, NewGEPI);
1348
1349 // Don't hack volatile/ordered stores.
1350 // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1351 if (!SI.isUnordered()) return nullptr;
1352
1353 // If the RHS is an alloca with a single use, zapify the store, making the
1354 // alloca dead.
1355 if (Ptr->hasOneUse()) {
1356 if (isa<AllocaInst>(Ptr))
1357 return eraseInstFromFunction(SI);
1358 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1359 if (isa<AllocaInst>(GEP->getOperand(0))) {
1360 if (GEP->getOperand(0)->hasOneUse())
1361 return eraseInstFromFunction(SI);
1362 }
1363 }
1364 }
1365
1366 // If we have a store to a location which is known constant, we can conclude
1367 // that the store must be storing the constant value (else the memory
1368 // wouldn't be constant), and this must be a noop.
1370 return eraseInstFromFunction(SI);
1371
1372 // Do really simple DSE, to catch cases where there are several consecutive
1373 // stores to the same location, separated by a few arithmetic operations. This
1374 // situation often occurs with bitfield accesses.
1375 BasicBlock::iterator BBI(SI);
1376 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1377 --ScanInsts) {
1378 --BBI;
1379 // Don't count debug info directives, lest they affect codegen,
1380 // and we skip pointer-to-pointer bitcasts, which are NOPs.
1381 if (BBI->isDebugOrPseudoInst()) {
1382 ScanInsts++;
1383 continue;
1384 }
1385
1386 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1387 // Prev store isn't volatile, and stores to the same location?
1388 if (PrevSI->isUnordered() &&
1389 equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
1390 PrevSI->getValueOperand()->getType() ==
1391 SI.getValueOperand()->getType()) {
1392 ++NumDeadStore;
1393 // Manually add back the original store to the worklist now, so it will
1394 // be processed after the operands of the removed store, as this may
1395 // expose additional DSE opportunities.
1396 Worklist.push(&SI);
1397 eraseInstFromFunction(*PrevSI);
1398 return nullptr;
1399 }
1400 break;
1401 }
1402
1403 // If this is a load, we have to stop. However, if the loaded value is from
1404 // the pointer we're loading and is producing the pointer we're storing,
1405 // then *this* store is dead (X = load P; store X -> P).
1406 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1407 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1408 assert(SI.isUnordered() && "can't eliminate ordering operation");
1409 return eraseInstFromFunction(SI);
1410 }
1411
1412 // Otherwise, this is a load from some other location. Stores before it
1413 // may not be dead.
1414 break;
1415 }
1416
1417 // Don't skip over loads, throws or things that can modify memory.
1418 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1419 break;
1420 }
1421
1422 // store X, null -> turns into 'unreachable' in SimplifyCFG
1423 // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1424 if (canSimplifyNullStoreOrGEP(SI)) {
1425 if (!isa<PoisonValue>(Val))
1426 return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1427 return nullptr; // Do not modify these!
1428 }
1429
1430 // This is a non-terminator unreachable marker. Don't remove it.
1431 if (isa<UndefValue>(Ptr)) {
1432 // Remove guaranteed-to-transfer instructions before the marker.
1434 return &SI;
1435
1436 // Remove all instructions after the marker and handle dead blocks this
1437 // implies.
1439 handleUnreachableFrom(SI.getNextNode(), Worklist);
1441 return nullptr;
1442 }
1443
1444 // store undef, Ptr -> noop
1445 // FIXME: This is technically incorrect because it might overwrite a poison
1446 // value. Change to PoisonValue once #52930 is resolved.
1447 if (isa<UndefValue>(Val))
1448 return eraseInstFromFunction(SI);
1449
1450 return nullptr;
1451}
1452
1453/// Try to transform:
1454/// if () { *P = v1; } else { *P = v2 }
1455/// or:
1456/// *P = v1; if () { *P = v2; }
1457/// into a phi node with a store in the successor.
1459 if (!SI.isUnordered())
1460 return false; // This code has not been audited for volatile/ordered case.
1461
1462 // Check if the successor block has exactly 2 incoming edges.
1463 BasicBlock *StoreBB = SI.getParent();
1464 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1465 if (!DestBB->hasNPredecessors(2))
1466 return false;
1467
1468 // Capture the other block (the block that doesn't contain our store).
1469 pred_iterator PredIter = pred_begin(DestBB);
1470 if (*PredIter == StoreBB)
1471 ++PredIter;
1472 BasicBlock *OtherBB = *PredIter;
1473
1474 // Bail out if all of the relevant blocks aren't distinct. This can happen,
1475 // for example, if SI is in an infinite loop.
1476 if (StoreBB == DestBB || OtherBB == DestBB)
1477 return false;
1478
1479 // Verify that the other block ends in a branch and is not otherwise empty.
1480 BasicBlock::iterator BBI(OtherBB->getTerminator());
1481 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1482 if (!OtherBr || BBI == OtherBB->begin())
1483 return false;
1484
1485 auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1486 if (!OtherStore ||
1487 OtherStore->getPointerOperand() != SI.getPointerOperand())
1488 return false;
1489
1490 auto *SIVTy = SI.getValueOperand()->getType();
1491 auto *OSVTy = OtherStore->getValueOperand()->getType();
1492 return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1493 SI.hasSameSpecialState(OtherStore);
1494 };
1495
1496 // If the other block ends in an unconditional branch, check for the 'if then
1497 // else' case. There is an instruction before the branch.
1498 StoreInst *OtherStore = nullptr;
1499 if (OtherBr->isUnconditional()) {
1500 --BBI;
1501 // Skip over debugging info and pseudo probes.
1502 while (BBI->isDebugOrPseudoInst()) {
1503 if (BBI==OtherBB->begin())
1504 return false;
1505 --BBI;
1506 }
1507 // If this isn't a store, isn't a store to the same location, or is not the
1508 // right kind of store, bail out.
1509 OtherStore = dyn_cast<StoreInst>(BBI);
1510 if (!OtherStoreIsMergeable(OtherStore))
1511 return false;
1512 } else {
1513 // Otherwise, the other block ended with a conditional branch. If one of the
1514 // destinations is StoreBB, then we have the if/then case.
1515 if (OtherBr->getSuccessor(0) != StoreBB &&
1516 OtherBr->getSuccessor(1) != StoreBB)
1517 return false;
1518
1519 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1520 // if/then triangle. See if there is a store to the same ptr as SI that
1521 // lives in OtherBB.
1522 for (;; --BBI) {
1523 // Check to see if we find the matching store.
1524 OtherStore = dyn_cast<StoreInst>(BBI);
1525 if (OtherStoreIsMergeable(OtherStore))
1526 break;
1527
1528 // If we find something that may be using or overwriting the stored
1529 // value, or if we run out of instructions, we can't do the transform.
1530 if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1531 BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1532 return false;
1533 }
1534
1535 // In order to eliminate the store in OtherBr, we have to make sure nothing
1536 // reads or overwrites the stored value in StoreBB.
1537 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1538 // FIXME: This should really be AA driven.
1539 if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1540 return false;
1541 }
1542 }
1543
1544 // Insert a PHI node now if we need it.
1545 Value *MergedVal = OtherStore->getValueOperand();
1546 // The debug locations of the original instructions might differ. Merge them.
1547 DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
1548 OtherStore->getDebugLoc());
1549 if (MergedVal != SI.getValueOperand()) {
1550 PHINode *PN =
1551 PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1552 PN->addIncoming(SI.getValueOperand(), SI.getParent());
1553 Builder.SetInsertPoint(OtherStore);
1554 PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1555 OtherBB);
1556 MergedVal = InsertNewInstBefore(PN, DestBB->begin());
1557 PN->setDebugLoc(MergedLoc);
1558 }
1559
1560 // Advance to a place where it is safe to insert the new store and insert it.
1561 BBI = DestBB->getFirstInsertionPt();
1562 StoreInst *NewSI =
1563 new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
1564 SI.getOrdering(), SI.getSyncScopeID());
1565 InsertNewInstBefore(NewSI, BBI);
1566 NewSI->setDebugLoc(MergedLoc);
1567 NewSI->mergeDIAssignID({&SI, OtherStore});
1568
1569 // If the two stores had AA tags, merge them.
1570 AAMDNodes AATags = SI.getAAMetadata();
1571 if (AATags)
1572 NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1573
1574 // Nuke the old stores.
1576 eraseInstFromFunction(*OtherStore);
1577 return true;
1578}
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides internal interfaces used to implement the InstCombine.
static StoreInst * combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
static Instruction * combineLoadToOperationType(InstCombinerImpl &IC, LoadInst &Load)
Combine loads to match the type of their uses' value after looking through intervening bitcasts.
static Instruction * replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, Instruction &MemI)
static Instruction * simplifyAllocaArraySize(InstCombinerImpl &IC, AllocaInst &AI, DominatorTree &DT)
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isSupportedAtomicType(Type *Ty)
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
static Instruction * unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI)
static cl::opt< unsigned > MaxCopiedFromConstantUsers("instcombine-max-copied-from-constant-users", cl::init(300), cl::desc("Maximum users to visit in copy from constant transform"), cl::Hidden)
static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI)
static Value * likeBitCastFromVector(InstCombinerImpl &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
static bool isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction * > &ToDelete)
isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) pointer to an alloca.
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
This file provides the interface for the instcombine pass implementation.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallString class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Returns a bitmask that should be unconditionally applied to the ModRef info of a memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:63
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:99
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:117
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:139
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:104
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
Definition: Instructions.h:128
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:95
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:878
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
A debug info location.
Definition: DebugLoc.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
Type * getSourceElementType() const
Definition: Instructions.h:990
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1781
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1882
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2234
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1798
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2225
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitLoadInst(LoadInst &LI)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitStoreInst(StoreInst &SI)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
bool removeInstructionsBeforeUnreachable(Instruction &I)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitAllocaInst(AllocaInst &AI)
SimplifyQuery SQ
Definition: InstCombiner.h:77
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:337
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:368
AAResults * AA
Definition: InstCombiner.h:70
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:388
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:56
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:65
const DataLayout & DL
Definition: InstCombiner.h:76
AssumptionCache & AC
Definition: InstCombiner.h:73
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:412
DominatorTree & DT
Definition: InstCombiner.h:75
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:433
BuilderTy & Builder
Definition: InstCombiner.h:61
void push(Instruction *I)
Push the instruction onto the worklist stack.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:953
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1764
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
void setAlignment(Align Align)
Definition: Instructions.h:215
Value * getPointerOperand()
Definition: Instructions.h:255
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:205
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:241
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:220
bool isUnordered() const
Definition: Instructions.h:249
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:230
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
Metadata node.
Definition: Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
This class wraps the llvm.memcpy/memmove intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Value * getValueOperand()
Definition: Instructions.h:378
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:364
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getZero()
Definition: TypeSize.h:351
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:267
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:200
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:252
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:214
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:3448
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:491
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1581
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1187
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:384
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2787
Value * simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q)
Given a load instruction and its pointer operand, fold the result or return null.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3439
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1866
auto pred_begin(const MachineBasicBlock *BB)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
SimplifyQuery getWithInstruction(const Instruction *I) const