LLVM 23.0.0git
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for load, store and alloca.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/Loads.h"
19#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/LLVMContext.h"
25using namespace llvm;
26using namespace PatternMatch;
27
28#define DEBUG_TYPE "instcombine"
29
30STATISTIC(NumDeadStore, "Number of dead stores eliminated");
31STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
32
34 "instcombine-max-copied-from-constant-users", cl::init(300),
35 cl::desc("Maximum users to visit in copy from constant transform"),
37
38/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
39/// pointer to an alloca. Ignore any reads of the pointer, return false if we
40/// see any stores or other unknown uses. If we see pointer arithmetic, keep
41/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
42/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
43/// the alloca, and if the source pointer is a pointer to a constant memory
44/// location, we can optimize this.
45static bool
47 MemTransferInst *&TheCopy,
49 // We track lifetime intrinsics as we encounter them. If we decide to go
50 // ahead and replace the value with the memory location, this lets the caller
51 // quickly eliminate the markers.
52
53 using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
56 Worklist.emplace_back(V, false);
57 while (!Worklist.empty()) {
58 ValueAndIsOffset Elem = Worklist.pop_back_val();
59 if (!Visited.insert(Elem).second)
60 continue;
61 if (Visited.size() > MaxCopiedFromConstantUsers)
62 return false;
63
64 const auto [Value, IsOffset] = Elem;
65 for (auto &U : Value->uses()) {
66 auto *I = cast<Instruction>(U.getUser());
67
68 if (auto *LI = dyn_cast<LoadInst>(I)) {
69 // Ignore non-volatile loads, they are always ok.
70 if (!LI->isSimple()) return false;
71 continue;
72 }
73
75 // We set IsOffset=true, to forbid the memcpy from occurring after the
76 // phi: If one of the phi operands is not based on the alloca, we
77 // would incorrectly omit a write.
78 Worklist.emplace_back(I, true);
79 continue;
80 }
82 // If uses of the bitcast are ok, we are ok.
83 Worklist.emplace_back(I, IsOffset);
84 continue;
85 }
86 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
87 // If the GEP has all zero indices, it doesn't offset the pointer. If it
88 // doesn't, it does.
89 Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
90 continue;
91 }
92
93 if (auto *Call = dyn_cast<CallBase>(I)) {
94 // If this is the function being called then we treat it like a load and
95 // ignore it.
96 if (Call->isCallee(&U))
97 continue;
98
99 unsigned DataOpNo = Call->getDataOperandNo(&U);
100 bool IsArgOperand = Call->isArgOperand(&U);
101
102 // Inalloca arguments are clobbered by the call.
103 if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
104 return false;
105
106 // If this call site doesn't modify the memory, then we know it is just
107 // a load (but one that potentially returns the value itself), so we can
108 // ignore it if we know that the value isn't captured.
109 bool NoCapture = Call->doesNotCapture(DataOpNo);
110 if (NoCapture &&
111 (Call->onlyReadsMemory() || Call->onlyReadsMemory(DataOpNo)))
112 continue;
113 }
114
115 // Lifetime intrinsics can be handled by the caller.
116 if (I->isLifetimeStartOrEnd()) {
117 assert(I->use_empty() && "Lifetime markers have no result to use!");
118 ToDelete.push_back(I);
119 continue;
120 }
121
122 // If this is isn't our memcpy/memmove, reject it as something we can't
123 // handle.
125 if (!MI)
126 return false;
127
128 // If the transfer is volatile, reject it.
129 if (MI->isVolatile())
130 return false;
131
132 // If the transfer is using the alloca as a source of the transfer, then
133 // ignore it since it is a load (unless the transfer is volatile).
134 if (U.getOperandNo() == 1)
135 continue;
136
137 // If we already have seen a copy, reject the second one.
138 if (TheCopy) return false;
139
140 // If the pointer has been offset from the start of the alloca, we can't
141 // safely handle this.
142 if (IsOffset) return false;
143
144 // If the memintrinsic isn't using the alloca as the dest, reject it.
145 if (U.getOperandNo() != 0) return false;
146
147 // If the source of the memcpy/move is not constant, reject it.
148 if (isModSet(AA->getModRefInfoMask(MI->getSource())))
149 return false;
150
151 // Otherwise, the transform is safe. Remember the copy instruction.
152 TheCopy = MI;
153 }
154 }
155 return true;
156}
157
158/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
159/// modified by a copy from a constant memory location. If we can prove this, we
160/// can replace any uses of the alloca with uses of the memory location
161/// directly.
162static MemTransferInst *
164 AllocaInst *AI,
166 MemTransferInst *TheCopy = nullptr;
167 if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
168 return TheCopy;
169 return nullptr;
170}
171
172/// Returns true if V is dereferenceable for size of alloca.
173static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
174 const DataLayout &DL) {
175 if (AI->isArrayAllocation())
176 return false;
177 uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
178 if (!AllocaSize)
179 return false;
181 APInt(64, AllocaSize), DL);
182}
183
185 AllocaInst &AI, DominatorTree &DT) {
186 // Check for array size of 1 (scalar allocation).
187 if (!AI.isArrayAllocation()) {
188 // i32 1 is the canonical array size for scalar allocations.
189 if (AI.getArraySize()->getType()->isIntegerTy(32))
190 return nullptr;
191
192 // Canonicalize it.
193 return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
194 }
195
196 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
197 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
198 if (C->getValue().getActiveBits() <= 64) {
199 Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
200 AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
201 nullptr, AI.getName());
202 New->setAlignment(AI.getAlign());
203 New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
204
205 replaceAllDbgUsesWith(AI, *New, *New, DT);
206 return IC.replaceInstUsesWith(AI, New);
207 }
208 }
209
212
213 // Ensure that the alloca array size argument has type equal to the offset
214 // size of the alloca() pointer, which, in the tyical case, is intptr_t,
215 // so that any casting is exposed early.
216 Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
217 if (AI.getArraySize()->getType() != PtrIdxTy) {
218 Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
219 return IC.replaceOperand(AI, 0, V);
220 }
221
222 return nullptr;
223}
224
225namespace {
226// If I and V are pointers in different address space, it is not allowed to
227// use replaceAllUsesWith since I and V have different types. A
228// non-target-specific transformation should not use addrspacecast on V since
229// the two address space may be disjoint depending on target.
230//
231// This class chases down uses of the old pointer until reaching the load
232// instructions, then replaces the old pointer in the load instructions with
233// the new pointer. If during the chasing it sees bitcast or GEP, it will
234// create new bitcast or GEP with the new pointer and use them in the load
235// instruction.
236class PointerReplacer {
237public:
238 PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
239 : IC(IC), Root(Root), FromAS(SrcAS) {}
240
241 bool collectUsers();
242 void replacePointer(Value *V);
243
244private:
245 void replace(Instruction *I);
246 Value *getReplacement(Value *V) const { return WorkMap.lookup(V); }
247 bool isAvailable(Instruction *I) const {
248 return I == &Root || UsersToReplace.contains(I);
249 }
250
251 bool isEqualOrValidAddrSpaceCast(const Instruction *I,
252 unsigned FromAS) const {
253 const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
254 if (!ASC)
255 return false;
256 unsigned ToAS = ASC->getDestAddressSpace();
257 return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
258 }
259
260 SmallSetVector<Instruction *, 32> UsersToReplace;
261 MapVector<Value *, Value *> WorkMap;
262 InstCombinerImpl &IC;
263 Instruction &Root;
264 unsigned FromAS;
265};
266} // end anonymous namespace
267
268bool PointerReplacer::collectUsers() {
270 SmallSetVector<Instruction *, 32> ValuesToRevisit;
271
272 auto PushUsersToWorklist = [&](Instruction *Inst) {
273 for (auto *U : Inst->users())
274 if (auto *I = dyn_cast<Instruction>(U))
275 if (!isAvailable(I) && !ValuesToRevisit.contains(I))
276 Worklist.emplace_back(I);
277 };
278
279 auto TryPushInstOperand = [&](Instruction *InstOp) {
280 if (!UsersToReplace.contains(InstOp)) {
281 if (!ValuesToRevisit.insert(InstOp))
282 return false;
283 Worklist.emplace_back(InstOp);
284 }
285 return true;
286 };
287
288 PushUsersToWorklist(&Root);
289 while (!Worklist.empty()) {
290 Instruction *Inst = Worklist.pop_back_val();
291 if (auto *Load = dyn_cast<LoadInst>(Inst)) {
292 if (Load->isVolatile())
293 return false;
294 UsersToReplace.insert(Load);
295 } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
296 /// TODO: Handle poison and null pointers for PHI and select.
297 // If all incoming values are available, mark this PHI as
298 // replacable and push it's users into the worklist.
299 bool IsReplaceable = all_of(PHI->incoming_values(),
300 [](Value *V) { return isa<Instruction>(V); });
301 if (IsReplaceable && all_of(PHI->incoming_values(), [&](Value *V) {
302 return isAvailable(cast<Instruction>(V));
303 })) {
304 UsersToReplace.insert(PHI);
305 PushUsersToWorklist(PHI);
306 continue;
307 }
308
309 // Either an incoming value is not an instruction or not all
310 // incoming values are available. If this PHI was already
311 // visited prior to this iteration, return false.
312 if (!IsReplaceable || !ValuesToRevisit.insert(PHI))
313 return false;
314
315 // Push PHI back into the stack, followed by unavailable
316 // incoming values.
317 Worklist.emplace_back(PHI);
318 for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
319 if (!TryPushInstOperand(cast<Instruction>(PHI->getIncomingValue(Idx))))
320 return false;
321 }
322 } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
323 auto *TrueInst = dyn_cast<Instruction>(SI->getTrueValue());
324 auto *FalseInst = dyn_cast<Instruction>(SI->getFalseValue());
325 if (!TrueInst || !FalseInst)
326 return false;
327
328 if (isAvailable(TrueInst) && isAvailable(FalseInst)) {
329 UsersToReplace.insert(SI);
330 PushUsersToWorklist(SI);
331 continue;
332 }
333
334 // Push select back onto the stack, followed by unavailable true/false
335 // value.
336 Worklist.emplace_back(SI);
337 if (!TryPushInstOperand(TrueInst) || !TryPushInstOperand(FalseInst))
338 return false;
339 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
340 auto *PtrOp = dyn_cast<Instruction>(GEP->getPointerOperand());
341 if (!PtrOp)
342 return false;
343 if (isAvailable(PtrOp)) {
344 UsersToReplace.insert(GEP);
345 PushUsersToWorklist(GEP);
346 continue;
347 }
348
349 Worklist.emplace_back(GEP);
350 if (!TryPushInstOperand(PtrOp))
351 return false;
352 } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
353 if (MI->isVolatile())
354 return false;
355 UsersToReplace.insert(Inst);
356 } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
357 UsersToReplace.insert(Inst);
358 PushUsersToWorklist(Inst);
359 } else if (Inst->isLifetimeStartOrEnd()) {
360 continue;
361 } else {
362 // TODO: For arbitrary uses with address space mismatches, should we check
363 // if we can introduce a valid addrspacecast?
364 LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *Inst << '\n');
365 return false;
366 }
367 }
368
369 return true;
370}
371
372void PointerReplacer::replacePointer(Value *V) {
373 assert(cast<PointerType>(Root.getType()) != cast<PointerType>(V->getType()) &&
374 "Invalid usage");
375 WorkMap[&Root] = V;
377 SetVector<Instruction *> PostOrderWorklist;
378 SmallPtrSet<Instruction *, 32> Visited;
379
380 // Perform a postorder traversal of the users of Root.
381 Worklist.push_back(&Root);
382 while (!Worklist.empty()) {
383 Instruction *I = Worklist.back();
384
385 // If I has not been processed before, push each of its
386 // replacable users into the worklist.
387 if (Visited.insert(I).second) {
388 for (auto *U : I->users()) {
389 auto *UserInst = cast<Instruction>(U);
390 if (UsersToReplace.contains(UserInst) && !Visited.contains(UserInst))
391 Worklist.push_back(UserInst);
392 }
393 // Otherwise, users of I have already been pushed into
394 // the PostOrderWorklist. Push I as well.
395 } else {
396 PostOrderWorklist.insert(I);
397 Worklist.pop_back();
398 }
399 }
400
401 // Replace pointers in reverse-postorder.
402 for (Instruction *I : reverse(PostOrderWorklist))
403 replace(I);
404}
405
406void PointerReplacer::replace(Instruction *I) {
407 if (getReplacement(I))
408 return;
409
410 if (auto *LT = dyn_cast<LoadInst>(I)) {
411 auto *V = getReplacement(LT->getPointerOperand());
412 assert(V && "Operand not replaced");
413 auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
414 LT->getAlign(), LT->getOrdering(),
415 LT->getSyncScopeID());
416 NewI->takeName(LT);
417 copyMetadataForLoad(*NewI, *LT);
418
419 IC.InsertNewInstWith(NewI, LT->getIterator());
420 IC.replaceInstUsesWith(*LT, NewI);
421 // LT has actually been replaced by NewI. It is useless to insert LT into
422 // the map. Instead, we insert NewI into the map to indicate this is the
423 // replacement (new value).
424 WorkMap[NewI] = NewI;
425 } else if (auto *PHI = dyn_cast<PHINode>(I)) {
426 // Create a new PHI by replacing any incoming value that is a user of the
427 // root pointer and has a replacement.
428 Value *V = WorkMap.lookup(PHI->getIncomingValue(0));
429 PHI->mutateType(V ? V->getType() : PHI->getIncomingValue(0)->getType());
430 for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) {
431 Value *V = WorkMap.lookup(PHI->getIncomingValue(I));
432 PHI->setIncomingValue(I, V ? V : PHI->getIncomingValue(I));
433 }
434 WorkMap[PHI] = PHI;
435 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
436 auto *V = getReplacement(GEP->getPointerOperand());
437 assert(V && "Operand not replaced");
438 SmallVector<Value *, 8> Indices(GEP->indices());
439 auto *NewI =
440 GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
441 IC.InsertNewInstWith(NewI, GEP->getIterator());
442 NewI->takeName(GEP);
443 NewI->setNoWrapFlags(GEP->getNoWrapFlags());
444 WorkMap[GEP] = NewI;
445 } else if (auto *SI = dyn_cast<SelectInst>(I)) {
446 Value *TrueValue = SI->getTrueValue();
447 Value *FalseValue = SI->getFalseValue();
448 if (Value *Replacement = getReplacement(TrueValue))
449 TrueValue = Replacement;
450 if (Value *Replacement = getReplacement(FalseValue))
451 FalseValue = Replacement;
452 auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue,
453 SI->getName(), nullptr, SI);
454 IC.InsertNewInstWith(NewSI, SI->getIterator());
455 NewSI->takeName(SI);
456 WorkMap[SI] = NewSI;
457 } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
458 auto *DestV = MemCpy->getRawDest();
459 auto *SrcV = MemCpy->getRawSource();
460
461 if (auto *DestReplace = getReplacement(DestV))
462 DestV = DestReplace;
463 if (auto *SrcReplace = getReplacement(SrcV))
464 SrcV = SrcReplace;
465
466 IC.Builder.SetInsertPoint(MemCpy);
467 auto *NewI = IC.Builder.CreateMemTransferInst(
468 MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
469 MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
470 AAMDNodes AAMD = MemCpy->getAAMetadata();
471 if (AAMD)
472 NewI->setAAMetadata(AAMD);
473
474 IC.eraseInstFromFunction(*MemCpy);
475 WorkMap[MemCpy] = NewI;
476 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
477 auto *V = getReplacement(ASC->getPointerOperand());
478 assert(V && "Operand not replaced");
479 assert(isEqualOrValidAddrSpaceCast(
480 ASC, V->getType()->getPointerAddressSpace()) &&
481 "Invalid address space cast!");
482
483 if (V->getType()->getPointerAddressSpace() !=
484 ASC->getType()->getPointerAddressSpace()) {
485 auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
486 NewI->takeName(ASC);
487 IC.InsertNewInstWith(NewI, ASC->getIterator());
488 WorkMap[ASC] = NewI;
489 } else {
490 WorkMap[ASC] = V;
491 }
492
493 } else {
494 llvm_unreachable("should never reach here");
495 }
496}
497
499 if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
500 return I;
501
502 if (AI.getAllocatedType()->isSized()) {
503 // Move all alloca's of zero byte objects to the entry block and merge them
504 // together. Note that we only do this for alloca's, because malloc should
505 // allocate and return a unique pointer, even for a zero byte allocation.
506 if (DL.getTypeAllocSize(AI.getAllocatedType()).getKnownMinValue() == 0) {
507 // For a zero sized alloca there is no point in doing an array allocation.
508 // This is helpful if the array size is a complicated expression not used
509 // elsewhere.
510 if (AI.isArrayAllocation())
511 return replaceOperand(AI, 0,
512 ConstantInt::get(AI.getArraySize()->getType(), 1));
513
514 // Get the first instruction in the entry block.
515 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
516 BasicBlock::iterator FirstInst = EntryBlock.getFirstNonPHIOrDbg();
517 if (&*FirstInst != &AI) {
518 // If the entry block doesn't start with a zero-size alloca then move
519 // this one to the start of the entry block. There is no problem with
520 // dominance as the array size was forced to a constant earlier already.
521 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
522 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
523 DL.getTypeAllocSize(EntryAI->getAllocatedType())
524 .getKnownMinValue() != 0) {
525 AI.moveBefore(FirstInst);
526 return &AI;
527 }
528
529 // Replace this zero-sized alloca with the one at the start of the entry
530 // block after ensuring that the address will be aligned enough for both
531 // types.
532 const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
533 EntryAI->setAlignment(MaxAlign);
534 return replaceInstUsesWith(AI, EntryAI);
535 }
536 }
537 }
538
539 // Check to see if this allocation is only modified by a memcpy/memmove from
540 // a memory location whose alignment is equal to or exceeds that of the
541 // allocation. If this is the case, we can change all users to use the
542 // constant memory location instead. This is commonly produced by the CFE by
543 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
544 // is only subsequently read.
546 if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
547 Value *TheSrc = Copy->getSource();
548 Align AllocaAlign = AI.getAlign();
549 Align SourceAlign = getOrEnforceKnownAlignment(
550 TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
551 if (AllocaAlign <= SourceAlign &&
552 isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
553 !isa<Instruction>(TheSrc)) {
554 // FIXME: Can we sink instructions without violating dominance when TheSrc
555 // is an instruction instead of a constant or argument?
556 LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
557 LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
558 unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
559 if (AI.getAddressSpace() == SrcAddrSpace) {
560 for (Instruction *Delete : ToDelete)
561 eraseInstFromFunction(*Delete);
562
563 Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
565 ++NumGlobalCopies;
566 return NewI;
567 }
568
569 PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
570 if (PtrReplacer.collectUsers()) {
571 for (Instruction *Delete : ToDelete)
572 eraseInstFromFunction(*Delete);
573
574 PtrReplacer.replacePointer(TheSrc);
575 ++NumGlobalCopies;
576 }
577 }
578 }
579
580 // At last, use the generic allocation site handler to aggressively remove
581 // unused allocas.
582 return visitAllocSite(AI);
583}
584
585// Are we allowed to form a atomic load or store of this type?
586static bool isSupportedAtomicType(Type *Ty) {
587 return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
588}
589
590/// Helper to combine a load to a new type.
591///
592/// This just does the work of combining a load to a new type. It handles
593/// metadata, etc., and returns the new instruction. The \c NewTy should be the
594/// loaded *value* type. This will convert it to a pointer, cast the operand to
595/// that pointer type, load it, etc.
596///
597/// Note that this will create all of the instructions with whatever insert
598/// point the \c InstCombinerImpl currently is using.
600 const Twine &Suffix) {
601 assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
602 "can't fold an atomic load to requested type");
603
604 LoadInst *NewLoad =
605 Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),
606 LI.isVolatile(), LI.getName() + Suffix);
607 NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
608 copyMetadataForLoad(*NewLoad, LI);
609 return NewLoad;
610}
611
612/// Combine a store to a new type.
613///
614/// Returns the newly created store instruction.
616 Value *V) {
617 assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
618 "can't fold an atomic store of requested type");
619
620 Value *Ptr = SI.getPointerOperand();
622 SI.getAllMetadata(MD);
623
624 StoreInst *NewStore =
625 IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
626 NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
627 for (const auto &MDPair : MD) {
628 unsigned ID = MDPair.first;
629 MDNode *N = MDPair.second;
630 // Note, essentially every kind of metadata should be preserved here! This
631 // routine is supposed to clone a store instruction changing *only its
632 // type*. The only metadata it makes sense to drop is metadata which is
633 // invalidated when the pointer type changes. This should essentially
634 // never be the case in LLVM, but we explicitly switch over only known
635 // metadata to be conservatively correct. If you are adding metadata to
636 // LLVM which pertains to stores, you almost certainly want to add it
637 // here.
638 switch (ID) {
639 case LLVMContext::MD_dbg:
640 case LLVMContext::MD_DIAssignID:
641 case LLVMContext::MD_tbaa:
642 case LLVMContext::MD_prof:
643 case LLVMContext::MD_fpmath:
644 case LLVMContext::MD_tbaa_struct:
645 case LLVMContext::MD_alias_scope:
646 case LLVMContext::MD_noalias:
647 case LLVMContext::MD_nontemporal:
648 case LLVMContext::MD_mem_parallel_loop_access:
649 case LLVMContext::MD_access_group:
650 // All of these directly apply.
651 NewStore->setMetadata(ID, N);
652 break;
653 case LLVMContext::MD_invariant_load:
654 case LLVMContext::MD_nonnull:
655 case LLVMContext::MD_noundef:
656 case LLVMContext::MD_range:
657 case LLVMContext::MD_align:
658 case LLVMContext::MD_dereferenceable:
659 case LLVMContext::MD_dereferenceable_or_null:
660 // These don't apply for stores.
661 break;
662 }
663 }
664
665 return NewStore;
666}
667
668/// Combine loads to match the type of their uses' value after looking
669/// through intervening bitcasts.
670///
671/// The core idea here is that if the result of a load is used in an operation,
672/// we should load the type most conducive to that operation. For example, when
673/// loading an integer and converting that immediately to a pointer, we should
674/// instead directly load a pointer.
675///
676/// However, this routine must never change the width of a load or the number of
677/// loads as that would introduce a semantic change. This combine is expected to
678/// be a semantic no-op which just allows loads to more closely model the types
679/// of their consuming operations.
680///
681/// Currently, we also refuse to change the precise type used for an atomic load
682/// or a volatile load. This is debatable, and might be reasonable to change
683/// later. However, it is risky in case some backend or other part of LLVM is
684/// relying on the exact type loaded to select appropriate atomic operations.
686 LoadInst &Load) {
687 // FIXME: We could probably with some care handle both volatile and ordered
688 // atomic loads here but it isn't clear that this is important.
689 if (!Load.isUnordered())
690 return nullptr;
691
692 if (Load.use_empty())
693 return nullptr;
694
695 // swifterror values can't be bitcasted.
696 if (Load.getPointerOperand()->isSwiftError())
697 return nullptr;
698
699 // Fold away bit casts of the loaded value by loading the desired type.
700 // Note that we should not do this for pointer<->integer casts,
701 // because that would result in type punning.
702 if (Load.hasOneUse()) {
703 // Don't transform when the type is x86_amx, it makes the pass that lower
704 // x86_amx type happy.
705 Type *LoadTy = Load.getType();
706 if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
707 assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
708 if (BC->getType()->isX86_AMXTy())
709 return nullptr;
710 }
711
712 if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
713 Type *DestTy = CastUser->getDestTy();
714 if (CastUser->isNoopCast(IC.getDataLayout()) &&
715 LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
716 (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
717 LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
718 CastUser->replaceAllUsesWith(NewLoad);
719 IC.eraseInstFromFunction(*CastUser);
720 return &Load;
721 }
722 }
723 }
724
725 // FIXME: We should also canonicalize loads of vectors when their elements are
726 // cast to other types.
727 return nullptr;
728}
729
731 // FIXME: We could probably with some care handle both volatile and atomic
732 // stores here but it isn't clear that this is important.
733 if (!LI.isSimple())
734 return nullptr;
735
736 Type *T = LI.getType();
737 if (!T->isAggregateType())
738 return nullptr;
739
740 StringRef Name = LI.getName();
741
742 if (auto *ST = dyn_cast<StructType>(T)) {
743 // If the struct only have one element, we unpack.
744 auto NumElements = ST->getNumElements();
745 if (NumElements == 1) {
746 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
747 ".unpack");
748 NewLoad->setAAMetadata(LI.getAAMetadata());
749 // Copy invariant metadata from parent load.
750 NewLoad->copyMetadata(LI, LLVMContext::MD_invariant_load);
752 PoisonValue::get(T), NewLoad, 0, Name));
753 }
754
755 // We don't want to break loads with padding here as we'd loose
756 // the knowledge that padding exists for the rest of the pipeline.
757 const DataLayout &DL = IC.getDataLayout();
758 auto *SL = DL.getStructLayout(ST);
759
760 if (SL->hasPadding())
761 return nullptr;
762
763 const auto Align = LI.getAlign();
764 auto *Addr = LI.getPointerOperand();
765 auto *IdxType = DL.getIndexType(Addr->getType());
766
768 for (unsigned i = 0; i < NumElements; i++) {
769 auto *Ptr = IC.Builder.CreateInBoundsPtrAdd(
770 Addr, IC.Builder.CreateTypeSize(IdxType, SL->getElementOffset(i)),
771 Name + ".elt");
772 auto *L = IC.Builder.CreateAlignedLoad(
773 ST->getElementType(i), Ptr,
774 commonAlignment(Align, SL->getElementOffset(i).getKnownMinValue()),
775 Name + ".unpack");
776 // Propagate AA metadata. It'll still be valid on the narrowed load.
777 L->setAAMetadata(LI.getAAMetadata());
778 // Copy invariant metadata from parent load.
779 L->copyMetadata(LI, LLVMContext::MD_invariant_load);
780 V = IC.Builder.CreateInsertValue(V, L, i);
781 }
782
783 V->setName(Name);
784 return IC.replaceInstUsesWith(LI, V);
785 }
786
787 if (auto *AT = dyn_cast<ArrayType>(T)) {
788 auto *ET = AT->getElementType();
789 auto NumElements = AT->getNumElements();
790 if (NumElements == 1) {
791 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
792 NewLoad->setAAMetadata(LI.getAAMetadata());
794 PoisonValue::get(T), NewLoad, 0, Name));
795 }
796
797 // Bail out if the array is too large. Ideally we would like to optimize
798 // arrays of arbitrary size but this has a terrible impact on compile time.
799 // The threshold here is chosen arbitrarily, maybe needs a little bit of
800 // tuning.
801 if (NumElements > IC.MaxArraySizeForCombine)
802 return nullptr;
803
804 const DataLayout &DL = IC.getDataLayout();
805 TypeSize EltSize = DL.getTypeAllocSize(ET);
806 const auto Align = LI.getAlign();
807
808 auto *Addr = LI.getPointerOperand();
809 auto *IdxType = Type::getInt64Ty(T->getContext());
810 auto *Zero = ConstantInt::get(IdxType, 0);
811
814 for (uint64_t i = 0; i < NumElements; i++) {
815 Value *Indices[2] = {
816 Zero,
817 ConstantInt::get(IdxType, i),
818 };
819 auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
820 Name + ".elt");
821 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
822 auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
823 EltAlign, Name + ".unpack");
824 L->setAAMetadata(LI.getAAMetadata());
825 V = IC.Builder.CreateInsertValue(V, L, i);
826 Offset += EltSize;
827 }
828
829 V->setName(Name);
830 return IC.replaceInstUsesWith(LI, V);
831 }
832
833 return nullptr;
834}
835
836// If we can determine that all possible objects pointed to by the provided
837// pointer value are, not only dereferenceable, but also definitively less than
838// or equal to the provided maximum size, then return true. Otherwise, return
839// false (constant global values and allocas fall into this category).
840//
841// FIXME: This should probably live in ValueTracking (or similar).
843 const DataLayout &DL) {
845 SmallVector<Value *, 4> Worklist(1, V);
846
847 do {
848 Value *P = Worklist.pop_back_val();
849 P = P->stripPointerCasts();
850
851 if (!Visited.insert(P).second)
852 continue;
853
855 Worklist.push_back(SI->getTrueValue());
856 Worklist.push_back(SI->getFalseValue());
857 continue;
858 }
859
860 if (PHINode *PN = dyn_cast<PHINode>(P)) {
861 append_range(Worklist, PN->incoming_values());
862 continue;
863 }
864
866 if (GA->isInterposable())
867 return false;
868 Worklist.push_back(GA->getAliasee());
869 continue;
870 }
871
872 // If we know how big this object is, and it is less than MaxSize, continue
873 // searching. Otherwise, return false.
874 if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
875 std::optional<TypeSize> AllocSize = AI->getAllocationSize(DL);
876 if (!AllocSize || AllocSize->isScalable() ||
877 AllocSize->getFixedValue() > MaxSize)
878 return false;
879 continue;
880 }
881
883 if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
884 return false;
885
886 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
887 if (InitSize > MaxSize)
888 return false;
889 continue;
890 }
891
892 return false;
893 } while (!Worklist.empty());
894
895 return true;
896}
897
898// If we're indexing into an object of a known size, and the outer index is
899// not a constant, but having any value but zero would lead to undefined
900// behavior, replace it with zero.
901//
902// For example, if we have:
903// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
904// ...
905// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
906// ... = load i32* %arrayidx, align 4
907// Then we know that we can replace %x in the GEP with i64 0.
908//
909// FIXME: We could fold any GEP index to zero that would cause UB if it were
910// not zero. Currently, we only handle the first such index. Also, we could
911// also search through non-zero constant indices if we kept track of the
912// offsets those indices implied.
914 GetElementPtrInst *GEPI, Instruction *MemI,
915 unsigned &Idx) {
916 if (GEPI->getNumOperands() < 2)
917 return false;
918
919 // Find the first non-zero index of a GEP. If all indices are zero, return
920 // one past the last index.
921 auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
922 unsigned I = 1;
923 for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
924 Value *V = GEPI->getOperand(I);
925 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
926 if (CI->isZero())
927 continue;
928
929 break;
930 }
931
932 return I;
933 };
934
935 // Skip through initial 'zero' indices, and find the corresponding pointer
936 // type. See if the next index is not a constant.
937 Idx = FirstNZIdx(GEPI);
938 if (Idx == GEPI->getNumOperands())
939 return false;
940 if (isa<Constant>(GEPI->getOperand(Idx)))
941 return false;
942
943 SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
944 Type *SourceElementType = GEPI->getSourceElementType();
945 // Size information about scalable vectors is not available, so we cannot
946 // deduce whether indexing at n is undefined behaviour or not. Bail out.
947 if (SourceElementType->isScalableTy())
948 return false;
949
950 Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
951 if (!AllocTy || !AllocTy->isSized())
952 return false;
953 const DataLayout &DL = IC.getDataLayout();
954 uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
955
956 // If there are more indices after the one we might replace with a zero, make
957 // sure they're all non-negative. If any of them are negative, the overall
958 // address being computed might be before the base address determined by the
959 // first non-zero index.
960 auto IsAllNonNegative = [&]() {
961 for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
962 KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), MemI);
963 if (Known.isNonNegative())
964 continue;
965 return false;
966 }
967
968 return true;
969 };
970
971 // FIXME: If the GEP is not inbounds, and there are extra indices after the
972 // one we'll replace, those could cause the address computation to wrap
973 // (rendering the IsAllNonNegative() check below insufficient). We can do
974 // better, ignoring zero indices (and other indices we can prove small
975 // enough not to wrap).
976 if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
977 return false;
978
979 // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
980 // also known to be dereferenceable.
981 return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
982 IsAllNonNegative();
983}
984
985// If we're indexing into an object with a variable index for the memory
986// access, but the object has only one element, we can assume that the index
987// will always be zero. If we replace the GEP, return it.
989 Instruction &MemI) {
991 unsigned Idx;
992 if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
993 Instruction *NewGEPI = GEPI->clone();
994 NewGEPI->setOperand(Idx,
995 ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
996 IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
997 return NewGEPI;
998 }
999 }
1000
1001 return nullptr;
1002}
1003
1005 if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
1006 return false;
1007
1008 auto *Ptr = SI.getPointerOperand();
1010 Ptr = GEPI->getOperand(0);
1011 return (isa<ConstantPointerNull>(Ptr) &&
1012 !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
1013}
1014
1017 const Value *GEPI0 = GEPI->getOperand(0);
1018 if (isa<ConstantPointerNull>(GEPI0) &&
1019 !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
1020 return true;
1021 }
1022 if (isa<UndefValue>(Op) ||
1025 return true;
1026 return false;
1027}
1028
1029Value *InstCombinerImpl::simplifyNonNullOperand(Value *V,
1030 bool HasDereferenceable,
1031 unsigned Depth) {
1032 if (auto *Sel = dyn_cast<SelectInst>(V)) {
1033 if (isa<ConstantPointerNull>(Sel->getOperand(1)))
1034 return Sel->getOperand(2);
1035
1036 if (isa<ConstantPointerNull>(Sel->getOperand(2)))
1037 return Sel->getOperand(1);
1038 }
1039
1040 if (!V->hasOneUse())
1041 return nullptr;
1042
1043 constexpr unsigned RecursionLimit = 3;
1044 if (Depth == RecursionLimit)
1045 return nullptr;
1046
1047 if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
1048 if (HasDereferenceable || GEP->isInBounds()) {
1049 if (auto *Res = simplifyNonNullOperand(GEP->getPointerOperand(),
1050 HasDereferenceable, Depth + 1)) {
1051 replaceOperand(*GEP, 0, Res);
1053 return nullptr;
1054 }
1055 }
1056 }
1057
1058 if (auto *PHI = dyn_cast<PHINode>(V)) {
1059 bool Changed = false;
1060 for (Use &U : PHI->incoming_values()) {
1061 // We set Depth to RecursionLimit to avoid expensive recursion.
1062 if (auto *Res = simplifyNonNullOperand(U.get(), HasDereferenceable,
1063 RecursionLimit)) {
1064 replaceUse(U, Res);
1065 Changed = true;
1066 }
1067 }
1068 if (Changed)
1070 return nullptr;
1071 }
1072
1073 return nullptr;
1074}
1075
1077 Value *Op = LI.getOperand(0);
1078 if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
1079 return replaceInstUsesWith(LI, Res);
1080
1081 // Try to canonicalize the loaded type.
1082 if (Instruction *Res = combineLoadToOperationType(*this, LI))
1083 return Res;
1084
1085 // Replace GEP indices if possible.
1086 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
1087 return replaceOperand(LI, 0, NewGEPI);
1088
1089 if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1090 return Res;
1091
1092 // Do really simple store-to-load forwarding and load CSE, to catch cases
1093 // where there are several consecutive memory accesses to the same location,
1094 // separated by a few arithmetic operations.
1095 bool IsLoadCSE = false;
1096 BatchAAResults BatchAA(*AA);
1097 if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
1098 if (IsLoadCSE)
1099 combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1100
1101 return replaceInstUsesWith(
1102 LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1103 LI.getName() + ".cast"));
1104 }
1105
1106 // None of the following transforms are legal for volatile/ordered atomic
1107 // loads. Most of them do apply for unordered atomics.
1108 if (!LI.isUnordered()) return nullptr;
1109
1110 // load(gep null, ...) -> unreachable
1111 // load null/undef -> unreachable
1112 // TODO: Consider a target hook for valid address spaces for this xforms.
1113 if (canSimplifyNullLoadOrGEP(LI, Op)) {
1116 }
1117
1118 if (Op->hasOneUse()) {
1119 // Change select and PHI nodes to select values instead of addresses: this
1120 // helps alias analysis out a lot, allows many others simplifications, and
1121 // exposes redundancy in the code.
1122 //
1123 // Note that we cannot do the transformation unless we know that the
1124 // introduced loads cannot trap! Something like this is valid as long as
1125 // the condition is always false: load (select bool %C, int* null, int* %G),
1126 // but it would not be valid if we transformed it to load from null
1127 // unconditionally.
1128 //
1129
1131 Value *SelectOp = Op;
1132 if (ASC && ASC->getOperand(0)->hasOneUse())
1133 SelectOp = ASC->getOperand(0);
1134 if (SelectInst *SI = dyn_cast<SelectInst>(SelectOp)) {
1135 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1136 // or
1137 // load (addrspacecast(select (Cond, &V1, &V2))) -->
1138 // select(Cond, load (addrspacecast(&V1)), load (addrspacecast(&V2))).
1139 Align Alignment = LI.getAlign();
1140 if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1141 Alignment, DL, SI) &&
1142 isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1143 Alignment, DL, SI)) {
1144
1145 auto MaybeCastedLoadOperand = [&](Value *Op) {
1146 if (ASC)
1147 return Builder.CreateAddrSpaceCast(Op, ASC->getType(),
1148 Op->getName() + ".cast");
1149 return Op;
1150 };
1151 Value *LoadOp1 = MaybeCastedLoadOperand(SI->getOperand(1));
1152 LoadInst *V1 = Builder.CreateLoad(LI.getType(), LoadOp1,
1153 LoadOp1->getName() + ".val");
1154
1155 Value *LoadOp2 = MaybeCastedLoadOperand(SI->getOperand(2));
1156 LoadInst *V2 = Builder.CreateLoad(LI.getType(), LoadOp2,
1157 LoadOp2->getName() + ".val");
1158 assert(LI.isUnordered() && "implied by above");
1159 V1->setAlignment(Alignment);
1160 V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1161 V2->setAlignment(Alignment);
1162 V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1163 // It is safe to copy any metadata that does not trigger UB. Copy any
1164 // poison-generating metadata.
1167 return SelectInst::Create(SI->getCondition(), V1, V2);
1168 }
1169 }
1170 }
1171
1173 if (Value *V = simplifyNonNullOperand(Op, /*HasDereferenceable=*/true))
1174 return replaceOperand(LI, 0, V);
1175
1176 return nullptr;
1177}
1178
1179/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1180///
1181/// \returns underlying value that was "cast", or nullptr otherwise.
1182///
1183/// For example, if we have:
1184///
1185/// %E0 = extractelement <2 x double> %U, i32 0
1186/// %V0 = insertvalue [2 x double] undef, double %E0, 0
1187/// %E1 = extractelement <2 x double> %U, i32 1
1188/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1189///
1190/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1191/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1192/// Note that %U may contain non-undef values where %V1 has undef.
1194 Value *U = nullptr;
1195 while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1196 auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1197 if (!E)
1198 return nullptr;
1199 auto *W = E->getVectorOperand();
1200 if (!U)
1201 U = W;
1202 else if (U != W)
1203 return nullptr;
1204 auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1205 if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1206 return nullptr;
1207 V = IV->getAggregateOperand();
1208 }
1209 if (!match(V, m_Undef()) || !U)
1210 return nullptr;
1211
1212 auto *UT = cast<VectorType>(U->getType());
1213 auto *VT = V->getType();
1214 // Check that types UT and VT are bitwise isomorphic.
1215 const auto &DL = IC.getDataLayout();
1216 if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1217 return nullptr;
1218 }
1219 if (auto *AT = dyn_cast<ArrayType>(VT)) {
1220 if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1221 return nullptr;
1222 } else {
1223 auto *ST = cast<StructType>(VT);
1224 if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1225 return nullptr;
1226 for (const auto *EltT : ST->elements()) {
1227 if (EltT != UT->getElementType())
1228 return nullptr;
1229 }
1230 }
1231 return U;
1232}
1233
1234/// Combine stores to match the type of value being stored.
1235///
1236/// The core idea here is that the memory does not have any intrinsic type and
1237/// where we can we should match the type of a store to the type of value being
1238/// stored.
1239///
1240/// However, this routine must never change the width of a store or the number of
1241/// stores as that would introduce a semantic change. This combine is expected to
1242/// be a semantic no-op which just allows stores to more closely model the types
1243/// of their incoming values.
1244///
1245/// Currently, we also refuse to change the precise type used for an atomic or
1246/// volatile store. This is debatable, and might be reasonable to change later.
1247/// However, it is risky in case some backend or other part of LLVM is relying
1248/// on the exact type stored to select appropriate atomic operations.
1249///
1250/// \returns true if the store was successfully combined away. This indicates
1251/// the caller must erase the store instruction. We have to let the caller erase
1252/// the store instruction as otherwise there is no way to signal whether it was
1253/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1255 // FIXME: We could probably with some care handle both volatile and ordered
1256 // atomic stores here but it isn't clear that this is important.
1257 if (!SI.isUnordered())
1258 return false;
1259
1260 // swifterror values can't be bitcasted.
1261 if (SI.getPointerOperand()->isSwiftError())
1262 return false;
1263
1264 Value *V = SI.getValueOperand();
1265
1266 // Fold away bit casts of the stored value by storing the original type.
1267 if (auto *BC = dyn_cast<BitCastInst>(V)) {
1268 assert(!BC->getType()->isX86_AMXTy() &&
1269 "store to x86_amx* should not happen!");
1270 V = BC->getOperand(0);
1271 // Don't transform when the type is x86_amx, it makes the pass that lower
1272 // x86_amx type happy.
1273 if (V->getType()->isX86_AMXTy())
1274 return false;
1275 if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1276 combineStoreToNewValue(IC, SI, V);
1277 return true;
1278 }
1279 }
1280
1281 if (Value *U = likeBitCastFromVector(IC, V))
1282 if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1283 combineStoreToNewValue(IC, SI, U);
1284 return true;
1285 }
1286
1287 // FIXME: We should also canonicalize stores of vectors when their elements
1288 // are cast to other types.
1289 return false;
1290}
1291
1293 // FIXME: We could probably with some care handle both volatile and atomic
1294 // stores here but it isn't clear that this is important.
1295 if (!SI.isSimple())
1296 return false;
1297
1298 Value *V = SI.getValueOperand();
1299 Type *T = V->getType();
1300
1301 if (!T->isAggregateType())
1302 return false;
1303
1304 if (auto *ST = dyn_cast<StructType>(T)) {
1305 // If the struct only have one element, we unpack.
1306 unsigned Count = ST->getNumElements();
1307 if (Count == 1) {
1308 V = IC.Builder.CreateExtractValue(V, 0);
1309 combineStoreToNewValue(IC, SI, V);
1310 return true;
1311 }
1312
1313 // We don't want to break loads with padding here as we'd loose
1314 // the knowledge that padding exists for the rest of the pipeline.
1315 const DataLayout &DL = IC.getDataLayout();
1316 auto *SL = DL.getStructLayout(ST);
1317
1318 if (SL->hasPadding())
1319 return false;
1320
1321 const auto Align = SI.getAlign();
1322
1323 SmallString<16> EltName = V->getName();
1324 EltName += ".elt";
1325 auto *Addr = SI.getPointerOperand();
1326 SmallString<16> AddrName = Addr->getName();
1327 AddrName += ".repack";
1328
1329 auto *IdxType = DL.getIndexType(Addr->getType());
1330 for (unsigned i = 0; i < Count; i++) {
1331 auto *Ptr = IC.Builder.CreateInBoundsPtrAdd(
1332 Addr, IC.Builder.CreateTypeSize(IdxType, SL->getElementOffset(i)),
1333 AddrName);
1334 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1335 auto EltAlign =
1336 commonAlignment(Align, SL->getElementOffset(i).getKnownMinValue());
1337 llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1338 NS->setAAMetadata(SI.getAAMetadata());
1339 }
1340
1341 return true;
1342 }
1343
1344 if (auto *AT = dyn_cast<ArrayType>(T)) {
1345 // If the array only have one element, we unpack.
1346 auto NumElements = AT->getNumElements();
1347 if (NumElements == 1) {
1348 V = IC.Builder.CreateExtractValue(V, 0);
1349 combineStoreToNewValue(IC, SI, V);
1350 return true;
1351 }
1352
1353 // Bail out if the array is too large. Ideally we would like to optimize
1354 // arrays of arbitrary size but this has a terrible impact on compile time.
1355 // The threshold here is chosen arbitrarily, maybe needs a little bit of
1356 // tuning.
1357 if (NumElements > IC.MaxArraySizeForCombine)
1358 return false;
1359
1360 const DataLayout &DL = IC.getDataLayout();
1361 TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
1362 const auto Align = SI.getAlign();
1363
1364 SmallString<16> EltName = V->getName();
1365 EltName += ".elt";
1366 auto *Addr = SI.getPointerOperand();
1367 SmallString<16> AddrName = Addr->getName();
1368 AddrName += ".repack";
1369
1370 auto *IdxType = Type::getInt64Ty(T->getContext());
1371 auto *Zero = ConstantInt::get(IdxType, 0);
1372
1374 for (uint64_t i = 0; i < NumElements; i++) {
1375 Value *Indices[2] = {
1376 Zero,
1377 ConstantInt::get(IdxType, i),
1378 };
1379 auto *Ptr =
1380 IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
1381 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1382 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
1383 Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1384 NS->setAAMetadata(SI.getAAMetadata());
1385 Offset += EltSize;
1386 }
1387
1388 return true;
1389 }
1390
1391 return false;
1392}
1393
1394/// equivalentAddressValues - Test if A and B will obviously have the same
1395/// value. This includes recognizing that %t0 and %t1 will have the same
1396/// value in code like this:
1397/// %t0 = getelementptr \@a, 0, 3
1398/// store i32 0, i32* %t0
1399/// %t1 = getelementptr \@a, 0, 3
1400/// %t2 = load i32* %t1
1401///
1403 // Test if the values are trivially equivalent.
1404 if (A == B) return true;
1405
1406 // Test if the values come form identical arithmetic instructions.
1407 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1408 // its only used to compare two uses within the same basic block, which
1409 // means that they'll always either have the same value or one of them
1410 // will have an undefined value.
1411 if (isa<BinaryOperator>(A) ||
1412 isa<CastInst>(A) ||
1413 isa<PHINode>(A) ||
1416 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1417 return true;
1418
1419 // Otherwise they may not be equivalent.
1420 return false;
1421}
1422
1424 Value *Val = SI.getOperand(0);
1425 Value *Ptr = SI.getOperand(1);
1426
1427 // Try to canonicalize the stored type.
1428 if (combineStoreToValueType(*this, SI))
1429 return eraseInstFromFunction(SI);
1430
1431 // Try to canonicalize the stored type.
1432 if (unpackStoreToAggregate(*this, SI))
1433 return eraseInstFromFunction(SI);
1434
1435 // Replace GEP indices if possible.
1436 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
1437 return replaceOperand(SI, 1, NewGEPI);
1438
1439 // Don't hack volatile/ordered stores.
1440 // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1441 if (!SI.isUnordered()) return nullptr;
1442
1443 // If the RHS is an alloca with a single use, zapify the store, making the
1444 // alloca dead.
1445 if (Ptr->hasOneUse()) {
1446 if (isa<AllocaInst>(Ptr))
1447 return eraseInstFromFunction(SI);
1449 if (isa<AllocaInst>(GEP->getOperand(0))) {
1450 if (GEP->getOperand(0)->hasOneUse())
1451 return eraseInstFromFunction(SI);
1452 }
1453 }
1454 }
1455
1456 // If we have a store to a location which is known constant, we can conclude
1457 // that the store must be storing the constant value (else the memory
1458 // wouldn't be constant), and this must be a noop.
1459 if (!isModSet(AA->getModRefInfoMask(Ptr)))
1460 return eraseInstFromFunction(SI);
1461
1462 // Do really simple DSE, to catch cases where there are several consecutive
1463 // stores to the same location, separated by a few arithmetic operations. This
1464 // situation often occurs with bitfield accesses.
1466 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1467 --ScanInsts) {
1468 --BBI;
1469 // Don't count debug info directives, lest they affect codegen,
1470 // and we skip pointer-to-pointer bitcasts, which are NOPs.
1471 if (BBI->isDebugOrPseudoInst()) {
1472 ScanInsts++;
1473 continue;
1474 }
1475
1476 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1477 // Prev store isn't volatile, and stores to the same location?
1478 if (PrevSI->isUnordered() &&
1479 equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
1480 PrevSI->getValueOperand()->getType() ==
1481 SI.getValueOperand()->getType()) {
1482 ++NumDeadStore;
1483 // Manually add back the original store to the worklist now, so it will
1484 // be processed after the operands of the removed store, as this may
1485 // expose additional DSE opportunities.
1486 Worklist.push(&SI);
1487 eraseInstFromFunction(*PrevSI);
1488 return nullptr;
1489 }
1490 break;
1491 }
1492
1493 // If this is a load, we have to stop. However, if the loaded value is from
1494 // the pointer we're loading and is producing the pointer we're storing,
1495 // then *this* store is dead (X = load P; store X -> P).
1496 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1497 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1498 assert(SI.isUnordered() && "can't eliminate ordering operation");
1499 return eraseInstFromFunction(SI);
1500 }
1501
1502 // Otherwise, this is a load from some other location. Stores before it
1503 // may not be dead.
1504 break;
1505 }
1506
1507 // Don't skip over loads, throws or things that can modify memory.
1508 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1509 break;
1510 }
1511
1512 // store X, null -> turns into 'unreachable' in SimplifyCFG
1513 // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1515 if (!isa<PoisonValue>(Val))
1516 return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1517 return nullptr; // Do not modify these!
1518 }
1519
1520 // This is a non-terminator unreachable marker. Don't remove it.
1521 if (isa<UndefValue>(Ptr)) {
1522 // Remove guaranteed-to-transfer instructions before the marker.
1524
1525 // Remove all instructions after the marker and handle dead blocks this
1526 // implies.
1528 handleUnreachableFrom(SI.getNextNode(), Worklist);
1530 return nullptr;
1531 }
1532
1533 // store undef, Ptr -> noop
1534 // FIXME: This is technically incorrect because it might overwrite a poison
1535 // value. Change to PoisonValue once #52930 is resolved.
1536 if (isa<UndefValue>(Val))
1537 return eraseInstFromFunction(SI);
1538
1539 if (!NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
1540 if (Value *V = simplifyNonNullOperand(Ptr, /*HasDereferenceable=*/true))
1541 return replaceOperand(SI, 1, V);
1542
1543 return nullptr;
1544}
1545
1546/// Try to transform:
1547/// if () { *P = v1; } else { *P = v2 }
1548/// or:
1549/// *P = v1; if () { *P = v2; }
1550/// into a phi node with a store in the successor.
1552 if (!SI.isUnordered())
1553 return false; // This code has not been audited for volatile/ordered case.
1554
1555 // Check if the successor block has exactly 2 incoming edges.
1556 BasicBlock *StoreBB = SI.getParent();
1557 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1558 if (!DestBB->hasNPredecessors(2))
1559 return false;
1560
1561 // Capture the other block (the block that doesn't contain our store).
1562 pred_iterator PredIter = pred_begin(DestBB);
1563 if (*PredIter == StoreBB)
1564 ++PredIter;
1565 BasicBlock *OtherBB = *PredIter;
1566
1567 // Bail out if all of the relevant blocks aren't distinct. This can happen,
1568 // for example, if SI is in an infinite loop.
1569 if (StoreBB == DestBB || OtherBB == DestBB)
1570 return false;
1571
1572 // Verify that the other block ends in a branch and is not otherwise empty.
1573 BasicBlock::iterator BBI(OtherBB->getTerminator());
1574 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1575 if (!OtherBr || BBI == OtherBB->begin())
1576 return false;
1577
1578 auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1579 if (!OtherStore ||
1580 OtherStore->getPointerOperand() != SI.getPointerOperand())
1581 return false;
1582
1583 auto *SIVTy = SI.getValueOperand()->getType();
1584 auto *OSVTy = OtherStore->getValueOperand()->getType();
1585 return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1586 SI.hasSameSpecialState(OtherStore);
1587 };
1588
1589 // If the other block ends in an unconditional branch, check for the 'if then
1590 // else' case. There is an instruction before the branch.
1591 StoreInst *OtherStore = nullptr;
1592 if (OtherBr->isUnconditional()) {
1593 --BBI;
1594 // Skip over debugging info and pseudo probes.
1595 while (BBI->isDebugOrPseudoInst()) {
1596 if (BBI==OtherBB->begin())
1597 return false;
1598 --BBI;
1599 }
1600 // If this isn't a store, isn't a store to the same location, or is not the
1601 // right kind of store, bail out.
1602 OtherStore = dyn_cast<StoreInst>(BBI);
1603 if (!OtherStoreIsMergeable(OtherStore))
1604 return false;
1605 } else {
1606 // Otherwise, the other block ended with a conditional branch. If one of the
1607 // destinations is StoreBB, then we have the if/then case.
1608 if (OtherBr->getSuccessor(0) != StoreBB &&
1609 OtherBr->getSuccessor(1) != StoreBB)
1610 return false;
1611
1612 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1613 // if/then triangle. See if there is a store to the same ptr as SI that
1614 // lives in OtherBB.
1615 for (;; --BBI) {
1616 // Check to see if we find the matching store.
1617 OtherStore = dyn_cast<StoreInst>(BBI);
1618 if (OtherStoreIsMergeable(OtherStore))
1619 break;
1620
1621 // If we find something that may be using or overwriting the stored
1622 // value, or if we run out of instructions, we can't do the transform.
1623 if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1624 BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1625 return false;
1626 }
1627
1628 // In order to eliminate the store in OtherBr, we have to make sure nothing
1629 // reads or overwrites the stored value in StoreBB.
1630 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1631 // FIXME: This should really be AA driven.
1632 if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1633 return false;
1634 }
1635 }
1636
1637 // Insert a PHI node now if we need it.
1638 Value *MergedVal = OtherStore->getValueOperand();
1639 // The debug locations of the original instructions might differ. Merge them.
1640 DebugLoc MergedLoc =
1641 DebugLoc::getMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc());
1642 if (MergedVal != SI.getValueOperand()) {
1643 PHINode *PN =
1644 PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1645 PN->addIncoming(SI.getValueOperand(), SI.getParent());
1646 Builder.SetInsertPoint(OtherStore);
1647 PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1648 OtherBB);
1649 MergedVal = InsertNewInstBefore(PN, DestBB->begin());
1650 PN->setDebugLoc(MergedLoc);
1651 }
1652
1653 // Advance to a place where it is safe to insert the new store and insert it.
1654 BBI = DestBB->getFirstInsertionPt();
1655 StoreInst *NewSI =
1656 new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
1657 SI.getOrdering(), SI.getSyncScopeID());
1658 InsertNewInstBefore(NewSI, BBI);
1659 NewSI->setDebugLoc(MergedLoc);
1660 NewSI->mergeDIAssignID({&SI, OtherStore});
1661
1662 // If the two stores had AA tags, merge them.
1663 AAMDNodes AATags = SI.getAAMetadata();
1664 if (AATags)
1665 NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1666
1667 // Nuke the old stores.
1669 eraseInstFromFunction(*OtherStore);
1670 return true;
1671}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides internal interfaces used to implement the InstCombine.
static StoreInst * combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
static Instruction * combineLoadToOperationType(InstCombinerImpl &IC, LoadInst &Load)
Combine loads to match the type of their uses' value after looking through intervening bitcasts.
static Instruction * replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, Instruction &MemI)
static Instruction * simplifyAllocaArraySize(InstCombinerImpl &IC, AllocaInst &AI, DominatorTree &DT)
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isSupportedAtomicType(Type *Ty)
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
static Instruction * unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI)
static cl::opt< unsigned > MaxCopiedFromConstantUsers("instcombine-max-copied-from-constant-users", cl::init(300), cl::desc("Maximum users to visit in copy from constant transform"), cl::Hidden)
static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI)
static Value * likeBitCastFromVector(InstCombinerImpl &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
static bool isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction * > &ToDelete)
isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) pointer to an alloca.
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
This file provides the interface for the instcombine pass implementation.
@ RecursionLimit
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
#define T
#define P(N)
This file defines the SmallString class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
A debug info location.
Definition DebugLoc.h:123
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:179
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
Type * getSourceElementType() const
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition IRBuilder.h:1833
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2590
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1867
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2583
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1934
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522
LLVM_ABI Value * CreateTypeSize(Type *Ty, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2231
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1886
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2012
LLVM_ABI CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitLoadInst(LoadInst &LI)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitStoreInst(StoreInst &SI)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; }...
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
bool removeInstructionsBeforeUnreachable(Instruction &I)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitAllocaInst(AllocaInst &AI)
SimplifyQuery SQ
const DataLayout & getDataLayout() const
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
BuilderTy & Builder
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
bool isUnordered() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1078
This class wraps the llvm.memcpy/memmove intrinsics.
static constexpr const unsigned PoisonGeneratingIDs[]
Metadata IDs that may generate poison.
Definition Metadata.h:146
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getZero()
Definition TypeSize.h:349
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:200
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool match(Val *V, const Pattern &P)
auto m_Undef()
Match an arbitrary undef constant.
initializer< Ty > init(const Ty &Val)
LLVM_ABI bool isAvailable()
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition Loads.cpp:229
LLVM_ABI void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition Local.cpp:3103
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
LLVM_ABI Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition Loads.cpp:542
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition Local.cpp:1566
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition Loads.cpp:435
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2414
LLVM_ABI Value * simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q)
Given a load instruction and its pointer operand, fold the result or return null.
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3094
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1908
DWARFExpression::Operation Op
PredIterator< BasicBlock, Value::user_iterator > pred_iterator
Definition CFG.h:105
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108