LLVM 20.0.0git
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for load, store and alloca.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/Loads.h"
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/LLVMContext.h"
27using namespace llvm;
28using namespace PatternMatch;
29
30#define DEBUG_TYPE "instcombine"
31
32STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34
36 "instcombine-max-copied-from-constant-users", cl::init(300),
37 cl::desc("Maximum users to visit in copy from constant transform"),
39
40/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
41/// pointer to an alloca. Ignore any reads of the pointer, return false if we
42/// see any stores or other unknown uses. If we see pointer arithmetic, keep
43/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
44/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
45/// the alloca, and if the source pointer is a pointer to a constant memory
46/// location, we can optimize this.
47static bool
49 MemTransferInst *&TheCopy,
51 // We track lifetime intrinsics as we encounter them. If we decide to go
52 // ahead and replace the value with the memory location, this lets the caller
53 // quickly eliminate the markers.
54
55 using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
58 Worklist.emplace_back(V, false);
59 while (!Worklist.empty()) {
60 ValueAndIsOffset Elem = Worklist.pop_back_val();
61 if (!Visited.insert(Elem).second)
62 continue;
63 if (Visited.size() > MaxCopiedFromConstantUsers)
64 return false;
65
66 const auto [Value, IsOffset] = Elem;
67 for (auto &U : Value->uses()) {
68 auto *I = cast<Instruction>(U.getUser());
69
70 if (auto *LI = dyn_cast<LoadInst>(I)) {
71 // Ignore non-volatile loads, they are always ok.
72 if (!LI->isSimple()) return false;
73 continue;
74 }
75
76 if (isa<PHINode, SelectInst>(I)) {
77 // We set IsOffset=true, to forbid the memcpy from occurring after the
78 // phi: If one of the phi operands is not based on the alloca, we
79 // would incorrectly omit a write.
80 Worklist.emplace_back(I, true);
81 continue;
82 }
83 if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
84 // If uses of the bitcast are ok, we are ok.
85 Worklist.emplace_back(I, IsOffset);
86 continue;
87 }
88 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
89 // If the GEP has all zero indices, it doesn't offset the pointer. If it
90 // doesn't, it does.
91 Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
92 continue;
93 }
94
95 if (auto *Call = dyn_cast<CallBase>(I)) {
96 // If this is the function being called then we treat it like a load and
97 // ignore it.
98 if (Call->isCallee(&U))
99 continue;
100
101 unsigned DataOpNo = Call->getDataOperandNo(&U);
102 bool IsArgOperand = Call->isArgOperand(&U);
103
104 // Inalloca arguments are clobbered by the call.
105 if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
106 return false;
107
108 // If this call site doesn't modify the memory, then we know it is just
109 // a load (but one that potentially returns the value itself), so we can
110 // ignore it if we know that the value isn't captured.
111 bool NoCapture = Call->doesNotCapture(DataOpNo);
112 if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) ||
113 (Call->onlyReadsMemory(DataOpNo) && NoCapture))
114 continue;
115
116 // If this is being passed as a byval argument, the caller is making a
117 // copy, so it is only a read of the alloca.
118 if (IsArgOperand && Call->isByValArgument(DataOpNo))
119 continue;
120 }
121
122 // Lifetime intrinsics can be handled by the caller.
123 if (I->isLifetimeStartOrEnd()) {
124 assert(I->use_empty() && "Lifetime markers have no result to use!");
125 ToDelete.push_back(I);
126 continue;
127 }
128
129 // If this is isn't our memcpy/memmove, reject it as something we can't
130 // handle.
131 MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
132 if (!MI)
133 return false;
134
135 // If the transfer is volatile, reject it.
136 if (MI->isVolatile())
137 return false;
138
139 // If the transfer is using the alloca as a source of the transfer, then
140 // ignore it since it is a load (unless the transfer is volatile).
141 if (U.getOperandNo() == 1)
142 continue;
143
144 // If we already have seen a copy, reject the second one.
145 if (TheCopy) return false;
146
147 // If the pointer has been offset from the start of the alloca, we can't
148 // safely handle this.
149 if (IsOffset) return false;
150
151 // If the memintrinsic isn't using the alloca as the dest, reject it.
152 if (U.getOperandNo() != 0) return false;
153
154 // If the source of the memcpy/move is not constant, reject it.
155 if (isModSet(AA->getModRefInfoMask(MI->getSource())))
156 return false;
157
158 // Otherwise, the transform is safe. Remember the copy instruction.
159 TheCopy = MI;
160 }
161 }
162 return true;
163}
164
165/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
166/// modified by a copy from a constant memory location. If we can prove this, we
167/// can replace any uses of the alloca with uses of the memory location
168/// directly.
169static MemTransferInst *
171 AllocaInst *AI,
173 MemTransferInst *TheCopy = nullptr;
174 if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
175 return TheCopy;
176 return nullptr;
177}
178
179/// Returns true if V is dereferenceable for size of alloca.
180static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
181 const DataLayout &DL) {
182 if (AI->isArrayAllocation())
183 return false;
184 uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
185 if (!AllocaSize)
186 return false;
188 APInt(64, AllocaSize), DL);
189}
190
192 AllocaInst &AI, DominatorTree &DT) {
193 // Check for array size of 1 (scalar allocation).
194 if (!AI.isArrayAllocation()) {
195 // i32 1 is the canonical array size for scalar allocations.
196 if (AI.getArraySize()->getType()->isIntegerTy(32))
197 return nullptr;
198
199 // Canonicalize it.
200 return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
201 }
202
203 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
204 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
205 if (C->getValue().getActiveBits() <= 64) {
206 Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
207 AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
208 nullptr, AI.getName());
209 New->setAlignment(AI.getAlign());
210 New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
211
212 replaceAllDbgUsesWith(AI, *New, *New, DT);
213 return IC.replaceInstUsesWith(AI, New);
214 }
215 }
216
217 if (isa<UndefValue>(AI.getArraySize()))
219
220 // Ensure that the alloca array size argument has type equal to the offset
221 // size of the alloca() pointer, which, in the tyical case, is intptr_t,
222 // so that any casting is exposed early.
223 Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
224 if (AI.getArraySize()->getType() != PtrIdxTy) {
225 Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
226 return IC.replaceOperand(AI, 0, V);
227 }
228
229 return nullptr;
230}
231
232namespace {
233// If I and V are pointers in different address space, it is not allowed to
234// use replaceAllUsesWith since I and V have different types. A
235// non-target-specific transformation should not use addrspacecast on V since
236// the two address space may be disjoint depending on target.
237//
238// This class chases down uses of the old pointer until reaching the load
239// instructions, then replaces the old pointer in the load instructions with
240// the new pointer. If during the chasing it sees bitcast or GEP, it will
241// create new bitcast or GEP with the new pointer and use them in the load
242// instruction.
243class PointerReplacer {
244public:
245 PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
246 : IC(IC), Root(Root), FromAS(SrcAS) {}
247
248 bool collectUsers();
249 void replacePointer(Value *V);
250
251private:
252 bool collectUsersRecursive(Instruction &I);
253 void replace(Instruction *I);
254 Value *getReplacement(Value *I);
255 bool isAvailable(Instruction *I) const {
256 return I == &Root || Worklist.contains(I);
257 }
258
259 bool isEqualOrValidAddrSpaceCast(const Instruction *I,
260 unsigned FromAS) const {
261 const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
262 if (!ASC)
263 return false;
264 unsigned ToAS = ASC->getDestAddressSpace();
265 return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
266 }
267
268 SmallPtrSet<Instruction *, 32> ValuesToRevisit;
272 Instruction &Root;
273 unsigned FromAS;
274};
275} // end anonymous namespace
276
277bool PointerReplacer::collectUsers() {
278 if (!collectUsersRecursive(Root))
279 return false;
280
281 // Ensure that all outstanding (indirect) users of I
282 // are inserted into the Worklist. Return false
283 // otherwise.
284 return llvm::set_is_subset(ValuesToRevisit, Worklist);
285}
286
287bool PointerReplacer::collectUsersRecursive(Instruction &I) {
288 for (auto *U : I.users()) {
289 auto *Inst = cast<Instruction>(&*U);
290 if (auto *Load = dyn_cast<LoadInst>(Inst)) {
291 if (Load->isVolatile())
292 return false;
293 Worklist.insert(Load);
294 } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
295 // All incoming values must be instructions for replacability
296 if (any_of(PHI->incoming_values(),
297 [](Value *V) { return !isa<Instruction>(V); }))
298 return false;
299
300 // If at least one incoming value of the PHI is not in Worklist,
301 // store the PHI for revisiting and skip this iteration of the
302 // loop.
303 if (any_of(PHI->incoming_values(), [this](Value *V) {
304 return !isAvailable(cast<Instruction>(V));
305 })) {
306 ValuesToRevisit.insert(Inst);
307 continue;
308 }
309
310 Worklist.insert(PHI);
311 if (!collectUsersRecursive(*PHI))
312 return false;
313 } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
314 if (!isa<Instruction>(SI->getTrueValue()) ||
315 !isa<Instruction>(SI->getFalseValue()))
316 return false;
317
318 if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
319 !isAvailable(cast<Instruction>(SI->getFalseValue()))) {
320 ValuesToRevisit.insert(Inst);
321 continue;
322 }
323 Worklist.insert(SI);
324 if (!collectUsersRecursive(*SI))
325 return false;
326 } else if (isa<GetElementPtrInst>(Inst)) {
327 Worklist.insert(Inst);
328 if (!collectUsersRecursive(*Inst))
329 return false;
330 } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
331 if (MI->isVolatile())
332 return false;
333 Worklist.insert(Inst);
334 } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
335 Worklist.insert(Inst);
336 if (!collectUsersRecursive(*Inst))
337 return false;
338 } else if (Inst->isLifetimeStartOrEnd()) {
339 continue;
340 } else {
341 // TODO: For arbitrary uses with address space mismatches, should we check
342 // if we can introduce a valid addrspacecast?
343 LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
344 return false;
345 }
346 }
347
348 return true;
349}
350
351Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
352
353void PointerReplacer::replace(Instruction *I) {
354 if (getReplacement(I))
355 return;
356
357 if (auto *LT = dyn_cast<LoadInst>(I)) {
358 auto *V = getReplacement(LT->getPointerOperand());
359 assert(V && "Operand not replaced");
360 auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
361 LT->getAlign(), LT->getOrdering(),
362 LT->getSyncScopeID());
363 NewI->takeName(LT);
364 copyMetadataForLoad(*NewI, *LT);
365
366 IC.InsertNewInstWith(NewI, LT->getIterator());
367 IC.replaceInstUsesWith(*LT, NewI);
368 WorkMap[LT] = NewI;
369 } else if (auto *PHI = dyn_cast<PHINode>(I)) {
370 Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
371 auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
372 PHI->getName(), PHI->getIterator());
373 for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
374 NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
375 PHI->getIncomingBlock(I));
376 WorkMap[PHI] = NewPHI;
377 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
378 auto *V = getReplacement(GEP->getPointerOperand());
379 assert(V && "Operand not replaced");
380 SmallVector<Value *, 8> Indices(GEP->indices());
381 auto *NewI =
382 GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
383 IC.InsertNewInstWith(NewI, GEP->getIterator());
384 NewI->takeName(GEP);
385 NewI->setNoWrapFlags(GEP->getNoWrapFlags());
386 WorkMap[GEP] = NewI;
387 } else if (auto *SI = dyn_cast<SelectInst>(I)) {
388 Value *TrueValue = SI->getTrueValue();
389 Value *FalseValue = SI->getFalseValue();
390 if (Value *Replacement = getReplacement(TrueValue))
391 TrueValue = Replacement;
392 if (Value *Replacement = getReplacement(FalseValue))
393 FalseValue = Replacement;
394 auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue,
395 SI->getName(), nullptr, SI);
396 IC.InsertNewInstWith(NewSI, SI->getIterator());
397 NewSI->takeName(SI);
398 WorkMap[SI] = NewSI;
399 } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
400 auto *DestV = MemCpy->getRawDest();
401 auto *SrcV = MemCpy->getRawSource();
402
403 if (auto *DestReplace = getReplacement(DestV))
404 DestV = DestReplace;
405 if (auto *SrcReplace = getReplacement(SrcV))
406 SrcV = SrcReplace;
407
408 IC.Builder.SetInsertPoint(MemCpy);
409 auto *NewI = IC.Builder.CreateMemTransferInst(
410 MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
411 MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
412 AAMDNodes AAMD = MemCpy->getAAMetadata();
413 if (AAMD)
414 NewI->setAAMetadata(AAMD);
415
416 IC.eraseInstFromFunction(*MemCpy);
417 WorkMap[MemCpy] = NewI;
418 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
419 auto *V = getReplacement(ASC->getPointerOperand());
420 assert(V && "Operand not replaced");
421 assert(isEqualOrValidAddrSpaceCast(
422 ASC, V->getType()->getPointerAddressSpace()) &&
423 "Invalid address space cast!");
424
425 if (V->getType()->getPointerAddressSpace() !=
426 ASC->getType()->getPointerAddressSpace()) {
427 auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
428 NewI->takeName(ASC);
429 IC.InsertNewInstWith(NewI, ASC->getIterator());
430 WorkMap[ASC] = NewI;
431 } else {
432 WorkMap[ASC] = V;
433 }
434
435 } else {
436 llvm_unreachable("should never reach here");
437 }
438}
439
440void PointerReplacer::replacePointer(Value *V) {
441#ifndef NDEBUG
442 auto *PT = cast<PointerType>(Root.getType());
443 auto *NT = cast<PointerType>(V->getType());
444 assert(PT != NT && "Invalid usage");
445#endif
446 WorkMap[&Root] = V;
447
448 for (Instruction *Workitem : Worklist)
449 replace(Workitem);
450}
451
453 if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
454 return I;
455
456 if (AI.getAllocatedType()->isSized()) {
457 // Move all alloca's of zero byte objects to the entry block and merge them
458 // together. Note that we only do this for alloca's, because malloc should
459 // allocate and return a unique pointer, even for a zero byte allocation.
461 // For a zero sized alloca there is no point in doing an array allocation.
462 // This is helpful if the array size is a complicated expression not used
463 // elsewhere.
464 if (AI.isArrayAllocation())
465 return replaceOperand(AI, 0,
466 ConstantInt::get(AI.getArraySize()->getType(), 1));
467
468 // Get the first instruction in the entry block.
469 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
470 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
471 if (FirstInst != &AI) {
472 // If the entry block doesn't start with a zero-size alloca then move
473 // this one to the start of the entry block. There is no problem with
474 // dominance as the array size was forced to a constant earlier already.
475 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
476 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
478 .getKnownMinValue() != 0) {
479 AI.moveBefore(FirstInst);
480 return &AI;
481 }
482
483 // Replace this zero-sized alloca with the one at the start of the entry
484 // block after ensuring that the address will be aligned enough for both
485 // types.
486 const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
487 EntryAI->setAlignment(MaxAlign);
488 return replaceInstUsesWith(AI, EntryAI);
489 }
490 }
491 }
492
493 // Check to see if this allocation is only modified by a memcpy/memmove from
494 // a memory location whose alignment is equal to or exceeds that of the
495 // allocation. If this is the case, we can change all users to use the
496 // constant memory location instead. This is commonly produced by the CFE by
497 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
498 // is only subsequently read.
500 if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
501 Value *TheSrc = Copy->getSource();
502 Align AllocaAlign = AI.getAlign();
503 Align SourceAlign = getOrEnforceKnownAlignment(
504 TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
505 if (AllocaAlign <= SourceAlign &&
506 isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
507 !isa<Instruction>(TheSrc)) {
508 // FIXME: Can we sink instructions without violating dominance when TheSrc
509 // is an instruction instead of a constant or argument?
510 LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
511 LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
512 unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
513 if (AI.getAddressSpace() == SrcAddrSpace) {
514 for (Instruction *Delete : ToDelete)
515 eraseInstFromFunction(*Delete);
516
517 Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
519 ++NumGlobalCopies;
520 return NewI;
521 }
522
523 PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
524 if (PtrReplacer.collectUsers()) {
525 for (Instruction *Delete : ToDelete)
526 eraseInstFromFunction(*Delete);
527
528 PtrReplacer.replacePointer(TheSrc);
529 ++NumGlobalCopies;
530 }
531 }
532 }
533
534 // At last, use the generic allocation site handler to aggressively remove
535 // unused allocas.
536 return visitAllocSite(AI);
537}
538
539// Are we allowed to form a atomic load or store of this type?
540static bool isSupportedAtomicType(Type *Ty) {
541 return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
542}
543
544/// Helper to combine a load to a new type.
545///
546/// This just does the work of combining a load to a new type. It handles
547/// metadata, etc., and returns the new instruction. The \c NewTy should be the
548/// loaded *value* type. This will convert it to a pointer, cast the operand to
549/// that pointer type, load it, etc.
550///
551/// Note that this will create all of the instructions with whatever insert
552/// point the \c InstCombinerImpl currently is using.
554 const Twine &Suffix) {
555 assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
556 "can't fold an atomic load to requested type");
557
558 LoadInst *NewLoad =
560 LI.isVolatile(), LI.getName() + Suffix);
561 NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
562 copyMetadataForLoad(*NewLoad, LI);
563 return NewLoad;
564}
565
566/// Combine a store to a new type.
567///
568/// Returns the newly created store instruction.
570 Value *V) {
571 assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
572 "can't fold an atomic store of requested type");
573
574 Value *Ptr = SI.getPointerOperand();
576 SI.getAllMetadata(MD);
577
578 StoreInst *NewStore =
579 IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
580 NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
581 for (const auto &MDPair : MD) {
582 unsigned ID = MDPair.first;
583 MDNode *N = MDPair.second;
584 // Note, essentially every kind of metadata should be preserved here! This
585 // routine is supposed to clone a store instruction changing *only its
586 // type*. The only metadata it makes sense to drop is metadata which is
587 // invalidated when the pointer type changes. This should essentially
588 // never be the case in LLVM, but we explicitly switch over only known
589 // metadata to be conservatively correct. If you are adding metadata to
590 // LLVM which pertains to stores, you almost certainly want to add it
591 // here.
592 switch (ID) {
593 case LLVMContext::MD_dbg:
594 case LLVMContext::MD_DIAssignID:
595 case LLVMContext::MD_tbaa:
596 case LLVMContext::MD_prof:
597 case LLVMContext::MD_fpmath:
598 case LLVMContext::MD_tbaa_struct:
599 case LLVMContext::MD_alias_scope:
600 case LLVMContext::MD_noalias:
601 case LLVMContext::MD_nontemporal:
602 case LLVMContext::MD_mem_parallel_loop_access:
603 case LLVMContext::MD_access_group:
604 // All of these directly apply.
605 NewStore->setMetadata(ID, N);
606 break;
607 case LLVMContext::MD_invariant_load:
608 case LLVMContext::MD_nonnull:
609 case LLVMContext::MD_noundef:
610 case LLVMContext::MD_range:
611 case LLVMContext::MD_align:
612 case LLVMContext::MD_dereferenceable:
613 case LLVMContext::MD_dereferenceable_or_null:
614 // These don't apply for stores.
615 break;
616 }
617 }
618
619 return NewStore;
620}
621
622/// Combine loads to match the type of their uses' value after looking
623/// through intervening bitcasts.
624///
625/// The core idea here is that if the result of a load is used in an operation,
626/// we should load the type most conducive to that operation. For example, when
627/// loading an integer and converting that immediately to a pointer, we should
628/// instead directly load a pointer.
629///
630/// However, this routine must never change the width of a load or the number of
631/// loads as that would introduce a semantic change. This combine is expected to
632/// be a semantic no-op which just allows loads to more closely model the types
633/// of their consuming operations.
634///
635/// Currently, we also refuse to change the precise type used for an atomic load
636/// or a volatile load. This is debatable, and might be reasonable to change
637/// later. However, it is risky in case some backend or other part of LLVM is
638/// relying on the exact type loaded to select appropriate atomic operations.
640 LoadInst &Load) {
641 // FIXME: We could probably with some care handle both volatile and ordered
642 // atomic loads here but it isn't clear that this is important.
643 if (!Load.isUnordered())
644 return nullptr;
645
646 if (Load.use_empty())
647 return nullptr;
648
649 // swifterror values can't be bitcasted.
650 if (Load.getPointerOperand()->isSwiftError())
651 return nullptr;
652
653 // Fold away bit casts of the loaded value by loading the desired type.
654 // Note that we should not do this for pointer<->integer casts,
655 // because that would result in type punning.
656 if (Load.hasOneUse()) {
657 // Don't transform when the type is x86_amx, it makes the pass that lower
658 // x86_amx type happy.
659 Type *LoadTy = Load.getType();
660 if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
661 assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
662 if (BC->getType()->isX86_AMXTy())
663 return nullptr;
664 }
665
666 if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
667 Type *DestTy = CastUser->getDestTy();
668 if (CastUser->isNoopCast(IC.getDataLayout()) &&
669 LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
670 (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
671 LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
672 CastUser->replaceAllUsesWith(NewLoad);
673 IC.eraseInstFromFunction(*CastUser);
674 return &Load;
675 }
676 }
677 }
678
679 // FIXME: We should also canonicalize loads of vectors when their elements are
680 // cast to other types.
681 return nullptr;
682}
683
685 // FIXME: We could probably with some care handle both volatile and atomic
686 // stores here but it isn't clear that this is important.
687 if (!LI.isSimple())
688 return nullptr;
689
690 Type *T = LI.getType();
691 if (!T->isAggregateType())
692 return nullptr;
693
694 StringRef Name = LI.getName();
695
696 if (auto *ST = dyn_cast<StructType>(T)) {
697 // If the struct only have one element, we unpack.
698 auto NumElements = ST->getNumElements();
699 if (NumElements == 1) {
700 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
701 ".unpack");
702 NewLoad->setAAMetadata(LI.getAAMetadata());
704 PoisonValue::get(T), NewLoad, 0, Name));
705 }
706
707 // We don't want to break loads with padding here as we'd loose
708 // the knowledge that padding exists for the rest of the pipeline.
709 const DataLayout &DL = IC.getDataLayout();
710 auto *SL = DL.getStructLayout(ST);
711
712 // Don't unpack for structure with scalable vector.
713 if (SL->getSizeInBits().isScalable())
714 return nullptr;
715
716 if (SL->hasPadding())
717 return nullptr;
718
719 const auto Align = LI.getAlign();
720 auto *Addr = LI.getPointerOperand();
721 auto *IdxType = Type::getInt32Ty(T->getContext());
722 auto *Zero = ConstantInt::get(IdxType, 0);
723
725 for (unsigned i = 0; i < NumElements; i++) {
726 Value *Indices[2] = {
727 Zero,
728 ConstantInt::get(IdxType, i),
729 };
730 auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices),
731 Name + ".elt");
732 auto *L = IC.Builder.CreateAlignedLoad(
733 ST->getElementType(i), Ptr,
734 commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
735 // Propagate AA metadata. It'll still be valid on the narrowed load.
736 L->setAAMetadata(LI.getAAMetadata());
737 V = IC.Builder.CreateInsertValue(V, L, i);
738 }
739
740 V->setName(Name);
741 return IC.replaceInstUsesWith(LI, V);
742 }
743
744 if (auto *AT = dyn_cast<ArrayType>(T)) {
745 auto *ET = AT->getElementType();
746 auto NumElements = AT->getNumElements();
747 if (NumElements == 1) {
748 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
749 NewLoad->setAAMetadata(LI.getAAMetadata());
751 PoisonValue::get(T), NewLoad, 0, Name));
752 }
753
754 // Bail out if the array is too large. Ideally we would like to optimize
755 // arrays of arbitrary size but this has a terrible impact on compile time.
756 // The threshold here is chosen arbitrarily, maybe needs a little bit of
757 // tuning.
758 if (NumElements > IC.MaxArraySizeForCombine)
759 return nullptr;
760
761 const DataLayout &DL = IC.getDataLayout();
762 TypeSize EltSize = DL.getTypeAllocSize(ET);
763 const auto Align = LI.getAlign();
764
765 auto *Addr = LI.getPointerOperand();
766 auto *IdxType = Type::getInt64Ty(T->getContext());
767 auto *Zero = ConstantInt::get(IdxType, 0);
768
771 for (uint64_t i = 0; i < NumElements; i++) {
772 Value *Indices[2] = {
773 Zero,
774 ConstantInt::get(IdxType, i),
775 };
776 auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
777 Name + ".elt");
778 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
779 auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
780 EltAlign, Name + ".unpack");
781 L->setAAMetadata(LI.getAAMetadata());
782 V = IC.Builder.CreateInsertValue(V, L, i);
783 Offset += EltSize;
784 }
785
786 V->setName(Name);
787 return IC.replaceInstUsesWith(LI, V);
788 }
789
790 return nullptr;
791}
792
793// If we can determine that all possible objects pointed to by the provided
794// pointer value are, not only dereferenceable, but also definitively less than
795// or equal to the provided maximum size, then return true. Otherwise, return
796// false (constant global values and allocas fall into this category).
797//
798// FIXME: This should probably live in ValueTracking (or similar).
800 const DataLayout &DL) {
802 SmallVector<Value *, 4> Worklist(1, V);
803
804 do {
805 Value *P = Worklist.pop_back_val();
806 P = P->stripPointerCasts();
807
808 if (!Visited.insert(P).second)
809 continue;
810
811 if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
812 Worklist.push_back(SI->getTrueValue());
813 Worklist.push_back(SI->getFalseValue());
814 continue;
815 }
816
817 if (PHINode *PN = dyn_cast<PHINode>(P)) {
818 append_range(Worklist, PN->incoming_values());
819 continue;
820 }
821
822 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
823 if (GA->isInterposable())
824 return false;
825 Worklist.push_back(GA->getAliasee());
826 continue;
827 }
828
829 // If we know how big this object is, and it is less than MaxSize, continue
830 // searching. Otherwise, return false.
831 if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
832 if (!AI->getAllocatedType()->isSized())
833 return false;
834
835 ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
836 if (!CS)
837 return false;
838
839 TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType());
840 if (TS.isScalable())
841 return false;
842 // Make sure that, even if the multiplication below would wrap as an
843 // uint64_t, we still do the right thing.
844 if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue()))
845 .ugt(MaxSize))
846 return false;
847 continue;
848 }
849
850 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
851 if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
852 return false;
853
854 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
855 if (InitSize > MaxSize)
856 return false;
857 continue;
858 }
859
860 return false;
861 } while (!Worklist.empty());
862
863 return true;
864}
865
866// If we're indexing into an object of a known size, and the outer index is
867// not a constant, but having any value but zero would lead to undefined
868// behavior, replace it with zero.
869//
870// For example, if we have:
871// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
872// ...
873// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
874// ... = load i32* %arrayidx, align 4
875// Then we know that we can replace %x in the GEP with i64 0.
876//
877// FIXME: We could fold any GEP index to zero that would cause UB if it were
878// not zero. Currently, we only handle the first such index. Also, we could
879// also search through non-zero constant indices if we kept track of the
880// offsets those indices implied.
882 GetElementPtrInst *GEPI, Instruction *MemI,
883 unsigned &Idx) {
884 if (GEPI->getNumOperands() < 2)
885 return false;
886
887 // Find the first non-zero index of a GEP. If all indices are zero, return
888 // one past the last index.
889 auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
890 unsigned I = 1;
891 for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
892 Value *V = GEPI->getOperand(I);
893 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
894 if (CI->isZero())
895 continue;
896
897 break;
898 }
899
900 return I;
901 };
902
903 // Skip through initial 'zero' indices, and find the corresponding pointer
904 // type. See if the next index is not a constant.
905 Idx = FirstNZIdx(GEPI);
906 if (Idx == GEPI->getNumOperands())
907 return false;
908 if (isa<Constant>(GEPI->getOperand(Idx)))
909 return false;
910
911 SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
912 Type *SourceElementType = GEPI->getSourceElementType();
913 // Size information about scalable vectors is not available, so we cannot
914 // deduce whether indexing at n is undefined behaviour or not. Bail out.
915 if (SourceElementType->isScalableTy())
916 return false;
917
918 Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
919 if (!AllocTy || !AllocTy->isSized())
920 return false;
921 const DataLayout &DL = IC.getDataLayout();
922 uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
923
924 // If there are more indices after the one we might replace with a zero, make
925 // sure they're all non-negative. If any of them are negative, the overall
926 // address being computed might be before the base address determined by the
927 // first non-zero index.
928 auto IsAllNonNegative = [&]() {
929 for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
930 KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
931 if (Known.isNonNegative())
932 continue;
933 return false;
934 }
935
936 return true;
937 };
938
939 // FIXME: If the GEP is not inbounds, and there are extra indices after the
940 // one we'll replace, those could cause the address computation to wrap
941 // (rendering the IsAllNonNegative() check below insufficient). We can do
942 // better, ignoring zero indices (and other indices we can prove small
943 // enough not to wrap).
944 if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
945 return false;
946
947 // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
948 // also known to be dereferenceable.
949 return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
950 IsAllNonNegative();
951}
952
953// If we're indexing into an object with a variable index for the memory
954// access, but the object has only one element, we can assume that the index
955// will always be zero. If we replace the GEP, return it.
957 Instruction &MemI) {
958 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
959 unsigned Idx;
960 if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
961 Instruction *NewGEPI = GEPI->clone();
962 NewGEPI->setOperand(Idx,
963 ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
964 IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
965 return NewGEPI;
966 }
967 }
968
969 return nullptr;
970}
971
973 if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
974 return false;
975
976 auto *Ptr = SI.getPointerOperand();
977 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
978 Ptr = GEPI->getOperand(0);
979 return (isa<ConstantPointerNull>(Ptr) &&
980 !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
981}
982
984 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
985 const Value *GEPI0 = GEPI->getOperand(0);
986 if (isa<ConstantPointerNull>(GEPI0) &&
987 !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
988 return true;
989 }
990 if (isa<UndefValue>(Op) ||
991 (isa<ConstantPointerNull>(Op) &&
993 return true;
994 return false;
995}
996
998 Value *Op = LI.getOperand(0);
999 if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
1000 return replaceInstUsesWith(LI, Res);
1001
1002 // Try to canonicalize the loaded type.
1003 if (Instruction *Res = combineLoadToOperationType(*this, LI))
1004 return Res;
1005
1006 // Replace GEP indices if possible.
1007 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
1008 return replaceOperand(LI, 0, NewGEPI);
1009
1010 if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1011 return Res;
1012
1013 // Do really simple store-to-load forwarding and load CSE, to catch cases
1014 // where there are several consecutive memory accesses to the same location,
1015 // separated by a few arithmetic operations.
1016 bool IsLoadCSE = false;
1017 BatchAAResults BatchAA(*AA);
1018 if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
1019 if (IsLoadCSE)
1020 combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1021
1022 return replaceInstUsesWith(
1023 LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1024 LI.getName() + ".cast"));
1025 }
1026
1027 // None of the following transforms are legal for volatile/ordered atomic
1028 // loads. Most of them do apply for unordered atomics.
1029 if (!LI.isUnordered()) return nullptr;
1030
1031 // load(gep null, ...) -> unreachable
1032 // load null/undef -> unreachable
1033 // TODO: Consider a target hook for valid address spaces for this xforms.
1034 if (canSimplifyNullLoadOrGEP(LI, Op)) {
1037 }
1038
1039 if (Op->hasOneUse()) {
1040 // Change select and PHI nodes to select values instead of addresses: this
1041 // helps alias analysis out a lot, allows many others simplifications, and
1042 // exposes redundancy in the code.
1043 //
1044 // Note that we cannot do the transformation unless we know that the
1045 // introduced loads cannot trap! Something like this is valid as long as
1046 // the condition is always false: load (select bool %C, int* null, int* %G),
1047 // but it would not be valid if we transformed it to load from null
1048 // unconditionally.
1049 //
1050 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1051 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1052 Align Alignment = LI.getAlign();
1053 if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1054 Alignment, DL, SI) &&
1055 isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1056 Alignment, DL, SI)) {
1057 LoadInst *V1 =
1058 Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1059 SI->getOperand(1)->getName() + ".val");
1060 LoadInst *V2 =
1061 Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1062 SI->getOperand(2)->getName() + ".val");
1063 assert(LI.isUnordered() && "implied by above");
1064 V1->setAlignment(Alignment);
1065 V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1066 V2->setAlignment(Alignment);
1067 V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1068 return SelectInst::Create(SI->getCondition(), V1, V2);
1069 }
1070
1071 // load (select (cond, null, P)) -> load P
1072 if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1073 !NullPointerIsDefined(SI->getFunction(),
1075 return replaceOperand(LI, 0, SI->getOperand(2));
1076
1077 // load (select (cond, P, null)) -> load P
1078 if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1079 !NullPointerIsDefined(SI->getFunction(),
1081 return replaceOperand(LI, 0, SI->getOperand(1));
1082 }
1083 }
1084 return nullptr;
1085}
1086
1087/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1088///
1089/// \returns underlying value that was "cast", or nullptr otherwise.
1090///
1091/// For example, if we have:
1092///
1093/// %E0 = extractelement <2 x double> %U, i32 0
1094/// %V0 = insertvalue [2 x double] undef, double %E0, 0
1095/// %E1 = extractelement <2 x double> %U, i32 1
1096/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1097///
1098/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1099/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1100/// Note that %U may contain non-undef values where %V1 has undef.
1102 Value *U = nullptr;
1103 while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1104 auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1105 if (!E)
1106 return nullptr;
1107 auto *W = E->getVectorOperand();
1108 if (!U)
1109 U = W;
1110 else if (U != W)
1111 return nullptr;
1112 auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1113 if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1114 return nullptr;
1115 V = IV->getAggregateOperand();
1116 }
1117 if (!match(V, m_Undef()) || !U)
1118 return nullptr;
1119
1120 auto *UT = cast<VectorType>(U->getType());
1121 auto *VT = V->getType();
1122 // Check that types UT and VT are bitwise isomorphic.
1123 const auto &DL = IC.getDataLayout();
1124 if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1125 return nullptr;
1126 }
1127 if (auto *AT = dyn_cast<ArrayType>(VT)) {
1128 if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1129 return nullptr;
1130 } else {
1131 auto *ST = cast<StructType>(VT);
1132 if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1133 return nullptr;
1134 for (const auto *EltT : ST->elements()) {
1135 if (EltT != UT->getElementType())
1136 return nullptr;
1137 }
1138 }
1139 return U;
1140}
1141
1142/// Combine stores to match the type of value being stored.
1143///
1144/// The core idea here is that the memory does not have any intrinsic type and
1145/// where we can we should match the type of a store to the type of value being
1146/// stored.
1147///
1148/// However, this routine must never change the width of a store or the number of
1149/// stores as that would introduce a semantic change. This combine is expected to
1150/// be a semantic no-op which just allows stores to more closely model the types
1151/// of their incoming values.
1152///
1153/// Currently, we also refuse to change the precise type used for an atomic or
1154/// volatile store. This is debatable, and might be reasonable to change later.
1155/// However, it is risky in case some backend or other part of LLVM is relying
1156/// on the exact type stored to select appropriate atomic operations.
1157///
1158/// \returns true if the store was successfully combined away. This indicates
1159/// the caller must erase the store instruction. We have to let the caller erase
1160/// the store instruction as otherwise there is no way to signal whether it was
1161/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1163 // FIXME: We could probably with some care handle both volatile and ordered
1164 // atomic stores here but it isn't clear that this is important.
1165 if (!SI.isUnordered())
1166 return false;
1167
1168 // swifterror values can't be bitcasted.
1169 if (SI.getPointerOperand()->isSwiftError())
1170 return false;
1171
1172 Value *V = SI.getValueOperand();
1173
1174 // Fold away bit casts of the stored value by storing the original type.
1175 if (auto *BC = dyn_cast<BitCastInst>(V)) {
1176 assert(!BC->getType()->isX86_AMXTy() &&
1177 "store to x86_amx* should not happen!");
1178 V = BC->getOperand(0);
1179 // Don't transform when the type is x86_amx, it makes the pass that lower
1180 // x86_amx type happy.
1181 if (V->getType()->isX86_AMXTy())
1182 return false;
1183 if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1184 combineStoreToNewValue(IC, SI, V);
1185 return true;
1186 }
1187 }
1188
1189 if (Value *U = likeBitCastFromVector(IC, V))
1190 if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1191 combineStoreToNewValue(IC, SI, U);
1192 return true;
1193 }
1194
1195 // FIXME: We should also canonicalize stores of vectors when their elements
1196 // are cast to other types.
1197 return false;
1198}
1199
1201 // FIXME: We could probably with some care handle both volatile and atomic
1202 // stores here but it isn't clear that this is important.
1203 if (!SI.isSimple())
1204 return false;
1205
1206 Value *V = SI.getValueOperand();
1207 Type *T = V->getType();
1208
1209 if (!T->isAggregateType())
1210 return false;
1211
1212 if (auto *ST = dyn_cast<StructType>(T)) {
1213 // If the struct only have one element, we unpack.
1214 unsigned Count = ST->getNumElements();
1215 if (Count == 1) {
1216 V = IC.Builder.CreateExtractValue(V, 0);
1217 combineStoreToNewValue(IC, SI, V);
1218 return true;
1219 }
1220
1221 // We don't want to break loads with padding here as we'd loose
1222 // the knowledge that padding exists for the rest of the pipeline.
1223 const DataLayout &DL = IC.getDataLayout();
1224 auto *SL = DL.getStructLayout(ST);
1225
1226 // Don't unpack for structure with scalable vector.
1227 if (SL->getSizeInBits().isScalable())
1228 return false;
1229
1230 if (SL->hasPadding())
1231 return false;
1232
1233 const auto Align = SI.getAlign();
1234
1235 SmallString<16> EltName = V->getName();
1236 EltName += ".elt";
1237 auto *Addr = SI.getPointerOperand();
1238 SmallString<16> AddrName = Addr->getName();
1239 AddrName += ".repack";
1240
1241 auto *IdxType = Type::getInt32Ty(ST->getContext());
1242 auto *Zero = ConstantInt::get(IdxType, 0);
1243 for (unsigned i = 0; i < Count; i++) {
1244 Value *Indices[2] = {
1245 Zero,
1246 ConstantInt::get(IdxType, i),
1247 };
1248 auto *Ptr =
1249 IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName);
1250 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1251 auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
1252 llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1253 NS->setAAMetadata(SI.getAAMetadata());
1254 }
1255
1256 return true;
1257 }
1258
1259 if (auto *AT = dyn_cast<ArrayType>(T)) {
1260 // If the array only have one element, we unpack.
1261 auto NumElements = AT->getNumElements();
1262 if (NumElements == 1) {
1263 V = IC.Builder.CreateExtractValue(V, 0);
1264 combineStoreToNewValue(IC, SI, V);
1265 return true;
1266 }
1267
1268 // Bail out if the array is too large. Ideally we would like to optimize
1269 // arrays of arbitrary size but this has a terrible impact on compile time.
1270 // The threshold here is chosen arbitrarily, maybe needs a little bit of
1271 // tuning.
1272 if (NumElements > IC.MaxArraySizeForCombine)
1273 return false;
1274
1275 const DataLayout &DL = IC.getDataLayout();
1276 TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
1277 const auto Align = SI.getAlign();
1278
1279 SmallString<16> EltName = V->getName();
1280 EltName += ".elt";
1281 auto *Addr = SI.getPointerOperand();
1282 SmallString<16> AddrName = Addr->getName();
1283 AddrName += ".repack";
1284
1285 auto *IdxType = Type::getInt64Ty(T->getContext());
1286 auto *Zero = ConstantInt::get(IdxType, 0);
1287
1289 for (uint64_t i = 0; i < NumElements; i++) {
1290 Value *Indices[2] = {
1291 Zero,
1292 ConstantInt::get(IdxType, i),
1293 };
1294 auto *Ptr =
1295 IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
1296 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1297 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
1298 Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1299 NS->setAAMetadata(SI.getAAMetadata());
1300 Offset += EltSize;
1301 }
1302
1303 return true;
1304 }
1305
1306 return false;
1307}
1308
1309/// equivalentAddressValues - Test if A and B will obviously have the same
1310/// value. This includes recognizing that %t0 and %t1 will have the same
1311/// value in code like this:
1312/// %t0 = getelementptr \@a, 0, 3
1313/// store i32 0, i32* %t0
1314/// %t1 = getelementptr \@a, 0, 3
1315/// %t2 = load i32* %t1
1316///
1318 // Test if the values are trivially equivalent.
1319 if (A == B) return true;
1320
1321 // Test if the values come form identical arithmetic instructions.
1322 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1323 // its only used to compare two uses within the same basic block, which
1324 // means that they'll always either have the same value or one of them
1325 // will have an undefined value.
1326 if (isa<BinaryOperator>(A) ||
1327 isa<CastInst>(A) ||
1328 isa<PHINode>(A) ||
1329 isa<GetElementPtrInst>(A))
1330 if (Instruction *BI = dyn_cast<Instruction>(B))
1331 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1332 return true;
1333
1334 // Otherwise they may not be equivalent.
1335 return false;
1336}
1337
1339 Value *Val = SI.getOperand(0);
1340 Value *Ptr = SI.getOperand(1);
1341
1342 // Try to canonicalize the stored type.
1343 if (combineStoreToValueType(*this, SI))
1344 return eraseInstFromFunction(SI);
1345
1346 // Try to canonicalize the stored type.
1347 if (unpackStoreToAggregate(*this, SI))
1348 return eraseInstFromFunction(SI);
1349
1350 // Replace GEP indices if possible.
1351 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
1352 return replaceOperand(SI, 1, NewGEPI);
1353
1354 // Don't hack volatile/ordered stores.
1355 // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1356 if (!SI.isUnordered()) return nullptr;
1357
1358 // If the RHS is an alloca with a single use, zapify the store, making the
1359 // alloca dead.
1360 if (Ptr->hasOneUse()) {
1361 if (isa<AllocaInst>(Ptr))
1362 return eraseInstFromFunction(SI);
1363 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1364 if (isa<AllocaInst>(GEP->getOperand(0))) {
1365 if (GEP->getOperand(0)->hasOneUse())
1366 return eraseInstFromFunction(SI);
1367 }
1368 }
1369 }
1370
1371 // If we have a store to a location which is known constant, we can conclude
1372 // that the store must be storing the constant value (else the memory
1373 // wouldn't be constant), and this must be a noop.
1375 return eraseInstFromFunction(SI);
1376
1377 // Do really simple DSE, to catch cases where there are several consecutive
1378 // stores to the same location, separated by a few arithmetic operations. This
1379 // situation often occurs with bitfield accesses.
1380 BasicBlock::iterator BBI(SI);
1381 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1382 --ScanInsts) {
1383 --BBI;
1384 // Don't count debug info directives, lest they affect codegen,
1385 // and we skip pointer-to-pointer bitcasts, which are NOPs.
1386 if (BBI->isDebugOrPseudoInst()) {
1387 ScanInsts++;
1388 continue;
1389 }
1390
1391 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1392 // Prev store isn't volatile, and stores to the same location?
1393 if (PrevSI->isUnordered() &&
1394 equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
1395 PrevSI->getValueOperand()->getType() ==
1396 SI.getValueOperand()->getType()) {
1397 ++NumDeadStore;
1398 // Manually add back the original store to the worklist now, so it will
1399 // be processed after the operands of the removed store, as this may
1400 // expose additional DSE opportunities.
1401 Worklist.push(&SI);
1402 eraseInstFromFunction(*PrevSI);
1403 return nullptr;
1404 }
1405 break;
1406 }
1407
1408 // If this is a load, we have to stop. However, if the loaded value is from
1409 // the pointer we're loading and is producing the pointer we're storing,
1410 // then *this* store is dead (X = load P; store X -> P).
1411 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1412 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1413 assert(SI.isUnordered() && "can't eliminate ordering operation");
1414 return eraseInstFromFunction(SI);
1415 }
1416
1417 // Otherwise, this is a load from some other location. Stores before it
1418 // may not be dead.
1419 break;
1420 }
1421
1422 // Don't skip over loads, throws or things that can modify memory.
1423 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1424 break;
1425 }
1426
1427 // store X, null -> turns into 'unreachable' in SimplifyCFG
1428 // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1429 if (canSimplifyNullStoreOrGEP(SI)) {
1430 if (!isa<PoisonValue>(Val))
1431 return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1432 return nullptr; // Do not modify these!
1433 }
1434
1435 // This is a non-terminator unreachable marker. Don't remove it.
1436 if (isa<UndefValue>(Ptr)) {
1437 // Remove guaranteed-to-transfer instructions before the marker.
1439 return &SI;
1440
1441 // Remove all instructions after the marker and handle dead blocks this
1442 // implies.
1444 handleUnreachableFrom(SI.getNextNode(), Worklist);
1446 return nullptr;
1447 }
1448
1449 // store undef, Ptr -> noop
1450 // FIXME: This is technically incorrect because it might overwrite a poison
1451 // value. Change to PoisonValue once #52930 is resolved.
1452 if (isa<UndefValue>(Val))
1453 return eraseInstFromFunction(SI);
1454
1455 return nullptr;
1456}
1457
1458/// Try to transform:
1459/// if () { *P = v1; } else { *P = v2 }
1460/// or:
1461/// *P = v1; if () { *P = v2; }
1462/// into a phi node with a store in the successor.
1464 if (!SI.isUnordered())
1465 return false; // This code has not been audited for volatile/ordered case.
1466
1467 // Check if the successor block has exactly 2 incoming edges.
1468 BasicBlock *StoreBB = SI.getParent();
1469 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1470 if (!DestBB->hasNPredecessors(2))
1471 return false;
1472
1473 // Capture the other block (the block that doesn't contain our store).
1474 pred_iterator PredIter = pred_begin(DestBB);
1475 if (*PredIter == StoreBB)
1476 ++PredIter;
1477 BasicBlock *OtherBB = *PredIter;
1478
1479 // Bail out if all of the relevant blocks aren't distinct. This can happen,
1480 // for example, if SI is in an infinite loop.
1481 if (StoreBB == DestBB || OtherBB == DestBB)
1482 return false;
1483
1484 // Verify that the other block ends in a branch and is not otherwise empty.
1485 BasicBlock::iterator BBI(OtherBB->getTerminator());
1486 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1487 if (!OtherBr || BBI == OtherBB->begin())
1488 return false;
1489
1490 auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1491 if (!OtherStore ||
1492 OtherStore->getPointerOperand() != SI.getPointerOperand())
1493 return false;
1494
1495 auto *SIVTy = SI.getValueOperand()->getType();
1496 auto *OSVTy = OtherStore->getValueOperand()->getType();
1497 return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1498 SI.hasSameSpecialState(OtherStore);
1499 };
1500
1501 // If the other block ends in an unconditional branch, check for the 'if then
1502 // else' case. There is an instruction before the branch.
1503 StoreInst *OtherStore = nullptr;
1504 if (OtherBr->isUnconditional()) {
1505 --BBI;
1506 // Skip over debugging info and pseudo probes.
1507 while (BBI->isDebugOrPseudoInst()) {
1508 if (BBI==OtherBB->begin())
1509 return false;
1510 --BBI;
1511 }
1512 // If this isn't a store, isn't a store to the same location, or is not the
1513 // right kind of store, bail out.
1514 OtherStore = dyn_cast<StoreInst>(BBI);
1515 if (!OtherStoreIsMergeable(OtherStore))
1516 return false;
1517 } else {
1518 // Otherwise, the other block ended with a conditional branch. If one of the
1519 // destinations is StoreBB, then we have the if/then case.
1520 if (OtherBr->getSuccessor(0) != StoreBB &&
1521 OtherBr->getSuccessor(1) != StoreBB)
1522 return false;
1523
1524 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1525 // if/then triangle. See if there is a store to the same ptr as SI that
1526 // lives in OtherBB.
1527 for (;; --BBI) {
1528 // Check to see if we find the matching store.
1529 OtherStore = dyn_cast<StoreInst>(BBI);
1530 if (OtherStoreIsMergeable(OtherStore))
1531 break;
1532
1533 // If we find something that may be using or overwriting the stored
1534 // value, or if we run out of instructions, we can't do the transform.
1535 if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1536 BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1537 return false;
1538 }
1539
1540 // In order to eliminate the store in OtherBr, we have to make sure nothing
1541 // reads or overwrites the stored value in StoreBB.
1542 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1543 // FIXME: This should really be AA driven.
1544 if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1545 return false;
1546 }
1547 }
1548
1549 // Insert a PHI node now if we need it.
1550 Value *MergedVal = OtherStore->getValueOperand();
1551 // The debug locations of the original instructions might differ. Merge them.
1552 DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
1553 OtherStore->getDebugLoc());
1554 if (MergedVal != SI.getValueOperand()) {
1555 PHINode *PN =
1556 PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1557 PN->addIncoming(SI.getValueOperand(), SI.getParent());
1558 Builder.SetInsertPoint(OtherStore);
1559 PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1560 OtherBB);
1561 MergedVal = InsertNewInstBefore(PN, DestBB->begin());
1562 PN->setDebugLoc(MergedLoc);
1563 }
1564
1565 // Advance to a place where it is safe to insert the new store and insert it.
1566 BBI = DestBB->getFirstInsertionPt();
1567 StoreInst *NewSI =
1568 new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
1569 SI.getOrdering(), SI.getSyncScopeID());
1570 InsertNewInstBefore(NewSI, BBI);
1571 NewSI->setDebugLoc(MergedLoc);
1572 NewSI->mergeDIAssignID({&SI, OtherStore});
1573
1574 // If the two stores had AA tags, merge them.
1575 AAMDNodes AATags = SI.getAAMetadata();
1576 if (AATags)
1577 NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1578
1579 // Nuke the old stores.
1581 eraseInstFromFunction(*OtherStore);
1582 return true;
1583}
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides internal interfaces used to implement the InstCombine.
static StoreInst * combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
static Instruction * combineLoadToOperationType(InstCombinerImpl &IC, LoadInst &Load)
Combine loads to match the type of their uses' value after looking through intervening bitcasts.
static Instruction * replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, Instruction &MemI)
static Instruction * simplifyAllocaArraySize(InstCombinerImpl &IC, AllocaInst &AI, DominatorTree &DT)
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isSupportedAtomicType(Type *Ty)
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
static Instruction * unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI)
static cl::opt< unsigned > MaxCopiedFromConstantUsers("instcombine-max-copied-from-constant-users", cl::init(300), cl::desc("Maximum users to visit in copy from constant transform"), cl::Hidden)
static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI)
static Value * likeBitCastFromVector(InstCombinerImpl &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
static bool isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction * > &ToDelete)
isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) pointer to an alloca.
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
This file provides the interface for the instcombine pass implementation.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallString class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Returns a bitmask that should be unconditionally applied to the ModRef info of a memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:63
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:99
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:117
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:139
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:104
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
Definition: Instructions.h:128
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:95
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:878
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
A debug info location.
Definition: DebugLoc.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
Type * getSourceElementType() const
Definition: Instructions.h:990
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1796
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1830
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1897
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2236
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1813
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2227
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1849
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitLoadInst(LoadInst &LI)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitStoreInst(StoreInst &SI)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
bool removeInstructionsBeforeUnreachable(Instruction &I)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitAllocaInst(AllocaInst &AI)
SimplifyQuery SQ
Definition: InstCombiner.h:77
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:343
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:374
AAResults * AA
Definition: InstCombiner.h:70
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:394
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:56
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:65
const DataLayout & DL
Definition: InstCombiner.h:76
AssumptionCache & AC
Definition: InstCombiner.h:73
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:418
DominatorTree & DT
Definition: InstCombiner.h:75
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:439
BuilderTy & Builder
Definition: InstCombiner.h:61
void push(Instruction *I)
Push the instruction onto the worklist stack.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:953
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1764
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
void setAlignment(Align Align)
Definition: Instructions.h:215
Value * getPointerOperand()
Definition: Instructions.h:255
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:205
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:241
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:220
bool isUnordered() const
Definition: Instructions.h:249
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:230
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
Metadata node.
Definition: Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
This class wraps the llvm.memcpy/memmove intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Value * getValueOperand()
Definition: Instructions.h:378
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:364
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getZero()
Definition: TypeSize.h:351
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:267
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:200
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:252
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:215
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:3449
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:492
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1581
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1187
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:385
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2787
Value * simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q)
Given a load instruction and its pointer operand, fold the result or return null.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3426
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1866
auto pred_begin(const MachineBasicBlock *BB)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
SimplifyQuery getWithInstruction(const Instruction *I) const