LLVM 23.0.0git
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for load, store and alloca.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/Loads.h"
19#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/LLVMContext.h"
25using namespace llvm;
26using namespace PatternMatch;
27
28#define DEBUG_TYPE "instcombine"
29
30namespace llvm {
32}
33
34STATISTIC(NumDeadStore, "Number of dead stores eliminated");
35STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
36
38 "instcombine-max-copied-from-constant-users", cl::init(300),
39 cl::desc("Maximum users to visit in copy from constant transform"),
41
42/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
43/// pointer to an alloca. Ignore any reads of the pointer, return false if we
44/// see any stores or other unknown uses. If we see pointer arithmetic, keep
45/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
46/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
47/// the alloca, and if the source pointer is a pointer to a constant memory
48/// location, we can optimize this.
49static bool
51 MemTransferInst *&TheCopy,
53 // We track lifetime intrinsics as we encounter them. If we decide to go
54 // ahead and replace the value with the memory location, this lets the caller
55 // quickly eliminate the markers.
56
57 using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
60 Worklist.emplace_back(V, false);
61 while (!Worklist.empty()) {
62 ValueAndIsOffset Elem = Worklist.pop_back_val();
63 if (!Visited.insert(Elem).second)
64 continue;
65 if (Visited.size() > MaxCopiedFromConstantUsers)
66 return false;
67
68 const auto [Value, IsOffset] = Elem;
69 for (auto &U : Value->uses()) {
70 auto *I = cast<Instruction>(U.getUser());
71
72 if (auto *LI = dyn_cast<LoadInst>(I)) {
73 // Ignore non-volatile loads, they are always ok.
74 if (!LI->isSimple()) return false;
75 continue;
76 }
77
79 // We set IsOffset=true, to forbid the memcpy from occurring after the
80 // phi: If one of the phi operands is not based on the alloca, we
81 // would incorrectly omit a write.
82 Worklist.emplace_back(I, true);
83 continue;
84 }
86 // If uses of the bitcast are ok, we are ok.
87 Worklist.emplace_back(I, IsOffset);
88 continue;
89 }
90 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
91 // If the GEP has all zero indices, it doesn't offset the pointer. If it
92 // doesn't, it does.
93 Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
94 continue;
95 }
96
97 if (auto *Call = dyn_cast<CallBase>(I)) {
98 // If this is the function being called then we treat it like a load and
99 // ignore it.
100 if (Call->isCallee(&U))
101 continue;
102
103 unsigned DataOpNo = Call->getDataOperandNo(&U);
104 bool IsArgOperand = Call->isArgOperand(&U);
105
106 // Inalloca arguments are clobbered by the call.
107 if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
108 return false;
109
110 // If this call site doesn't modify the memory, then we know it is just
111 // a load (but one that potentially returns the value itself), so we can
112 // ignore it if we know that the value isn't captured.
113 bool NoCapture = Call->doesNotCapture(DataOpNo);
114 if (NoCapture &&
115 (Call->onlyReadsMemory() || Call->onlyReadsMemory(DataOpNo)))
116 continue;
117 }
118
119 // Lifetime intrinsics can be handled by the caller.
120 if (I->isLifetimeStartOrEnd()) {
121 assert(I->use_empty() && "Lifetime markers have no result to use!");
122 ToDelete.push_back(I);
123 continue;
124 }
125
126 // If this is isn't our memcpy/memmove, reject it as something we can't
127 // handle.
129 if (!MI)
130 return false;
131
132 // If the transfer is volatile, reject it.
133 if (MI->isVolatile())
134 return false;
135
136 // If the transfer is using the alloca as a source of the transfer, then
137 // ignore it since it is a load (unless the transfer is volatile).
138 if (U.getOperandNo() == 1)
139 continue;
140
141 // If we already have seen a copy, reject the second one.
142 if (TheCopy) return false;
143
144 // If the pointer has been offset from the start of the alloca, we can't
145 // safely handle this.
146 if (IsOffset) return false;
147
148 // If the memintrinsic isn't using the alloca as the dest, reject it.
149 if (U.getOperandNo() != 0) return false;
150
151 // If the source of the memcpy/move is not constant, reject it.
152 if (isModSet(AA->getModRefInfoMask(MI->getSource())))
153 return false;
154
155 // Otherwise, the transform is safe. Remember the copy instruction.
156 TheCopy = MI;
157 }
158 }
159 return true;
160}
161
162/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
163/// modified by a copy from a constant memory location. If we can prove this, we
164/// can replace any uses of the alloca with uses of the memory location
165/// directly.
166static MemTransferInst *
168 AllocaInst *AI,
170 MemTransferInst *TheCopy = nullptr;
171 if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
172 return TheCopy;
173 return nullptr;
174}
175
176/// Returns true if V is dereferenceable for size of alloca.
177static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
178 const DataLayout &DL) {
179 if (AI->isArrayAllocation())
180 return false;
181 uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
182 if (!AllocaSize)
183 return false;
185 APInt(64, AllocaSize), DL);
186}
187
189 AllocaInst &AI, DominatorTree &DT) {
190 // Check for array size of 1 (scalar allocation).
191 if (!AI.isArrayAllocation()) {
192 // i32 1 is the canonical array size for scalar allocations.
193 if (AI.getArraySize()->getType()->isIntegerTy(32))
194 return nullptr;
195
196 // Canonicalize it.
197 return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
198 }
199
200 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
201 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
202 if (C->getValue().getActiveBits() <= 64) {
203 Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
204 AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
205 nullptr, AI.getName());
206 New->setAlignment(AI.getAlign());
207 New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
208
209 replaceAllDbgUsesWith(AI, *New, *New, DT);
210 return IC.replaceInstUsesWith(AI, New);
211 }
212 }
213
216
217 // Ensure that the alloca array size argument has type equal to the offset
218 // size of the alloca() pointer, which, in the tyical case, is intptr_t,
219 // so that any casting is exposed early.
220 Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
221 if (AI.getArraySize()->getType() != PtrIdxTy) {
222 Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
223 return IC.replaceOperand(AI, 0, V);
224 }
225
226 return nullptr;
227}
228
229namespace {
230// If I and V are pointers in different address space, it is not allowed to
231// use replaceAllUsesWith since I and V have different types. A
232// non-target-specific transformation should not use addrspacecast on V since
233// the two address space may be disjoint depending on target.
234//
235// This class chases down uses of the old pointer until reaching the load
236// instructions, then replaces the old pointer in the load instructions with
237// the new pointer. If during the chasing it sees bitcast or GEP, it will
238// create new bitcast or GEP with the new pointer and use them in the load
239// instruction.
240class PointerReplacer {
241public:
242 PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
243 : IC(IC), Root(Root), FromAS(SrcAS) {}
244
245 bool collectUsers();
246 void replacePointer(Value *V);
247
248private:
249 void replace(Instruction *I);
250 Value *getReplacement(Value *V) const { return WorkMap.lookup(V); }
251 bool isAvailable(Instruction *I) const {
252 return I == &Root || UsersToReplace.contains(I);
253 }
254
255 bool isEqualOrValidAddrSpaceCast(const Instruction *I,
256 unsigned FromAS) const {
257 const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
258 if (!ASC)
259 return false;
260 unsigned ToAS = ASC->getDestAddressSpace();
261 return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
262 }
263
264 SmallSetVector<Instruction *, 32> UsersToReplace;
265 MapVector<Value *, Value *> WorkMap;
266 InstCombinerImpl &IC;
267 Instruction &Root;
268 unsigned FromAS;
269};
270} // end anonymous namespace
271
272bool PointerReplacer::collectUsers() {
274 SmallSetVector<Instruction *, 32> ValuesToRevisit;
275
276 auto PushUsersToWorklist = [&](Instruction *Inst) {
277 for (auto *U : Inst->users())
278 if (auto *I = dyn_cast<Instruction>(U))
279 if (!isAvailable(I) && !ValuesToRevisit.contains(I))
280 Worklist.emplace_back(I);
281 };
282
283 auto TryPushInstOperand = [&](Instruction *InstOp) {
284 if (!UsersToReplace.contains(InstOp)) {
285 if (!ValuesToRevisit.insert(InstOp))
286 return false;
287 Worklist.emplace_back(InstOp);
288 }
289 return true;
290 };
291
292 PushUsersToWorklist(&Root);
293 while (!Worklist.empty()) {
294 Instruction *Inst = Worklist.pop_back_val();
295 if (auto *Load = dyn_cast<LoadInst>(Inst)) {
296 if (Load->isVolatile())
297 return false;
298 UsersToReplace.insert(Load);
299 } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
300 /// TODO: Handle poison and null pointers for PHI and select.
301 // If all incoming values are available, mark this PHI as
302 // replacable and push it's users into the worklist.
303 bool IsReplaceable = all_of(PHI->incoming_values(),
304 [](Value *V) { return isa<Instruction>(V); });
305 if (IsReplaceable && all_of(PHI->incoming_values(), [&](Value *V) {
306 return isAvailable(cast<Instruction>(V));
307 })) {
308 UsersToReplace.insert(PHI);
309 PushUsersToWorklist(PHI);
310 continue;
311 }
312
313 // Either an incoming value is not an instruction or not all
314 // incoming values are available. If this PHI was already
315 // visited prior to this iteration, return false.
316 if (!IsReplaceable || !ValuesToRevisit.insert(PHI))
317 return false;
318
319 // Push PHI back into the stack, followed by unavailable
320 // incoming values.
321 Worklist.emplace_back(PHI);
322 for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
323 if (!TryPushInstOperand(cast<Instruction>(PHI->getIncomingValue(Idx))))
324 return false;
325 }
326 } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
327 auto *TrueInst = dyn_cast<Instruction>(SI->getTrueValue());
328 auto *FalseInst = dyn_cast<Instruction>(SI->getFalseValue());
329 if (!TrueInst || !FalseInst)
330 return false;
331
332 if (isAvailable(TrueInst) && isAvailable(FalseInst)) {
333 UsersToReplace.insert(SI);
334 PushUsersToWorklist(SI);
335 continue;
336 }
337
338 // Push select back onto the stack, followed by unavailable true/false
339 // value.
340 Worklist.emplace_back(SI);
341 if (!TryPushInstOperand(TrueInst) || !TryPushInstOperand(FalseInst))
342 return false;
343 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
344 auto *PtrOp = dyn_cast<Instruction>(GEP->getPointerOperand());
345 if (!PtrOp)
346 return false;
347 if (isAvailable(PtrOp)) {
348 UsersToReplace.insert(GEP);
349 PushUsersToWorklist(GEP);
350 continue;
351 }
352
353 Worklist.emplace_back(GEP);
354 if (!TryPushInstOperand(PtrOp))
355 return false;
356 } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
357 if (MI->isVolatile())
358 return false;
359 UsersToReplace.insert(Inst);
360 } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
361 UsersToReplace.insert(Inst);
362 PushUsersToWorklist(Inst);
363 } else if (Inst->isLifetimeStartOrEnd()) {
364 continue;
365 } else {
366 // TODO: For arbitrary uses with address space mismatches, should we check
367 // if we can introduce a valid addrspacecast?
368 LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *Inst << '\n');
369 return false;
370 }
371 }
372
373 return true;
374}
375
376void PointerReplacer::replacePointer(Value *V) {
377 assert(cast<PointerType>(Root.getType()) != cast<PointerType>(V->getType()) &&
378 "Invalid usage");
379 WorkMap[&Root] = V;
381 SetVector<Instruction *> PostOrderWorklist;
382 SmallPtrSet<Instruction *, 32> Visited;
383
384 // Perform a postorder traversal of the users of Root.
385 Worklist.push_back(&Root);
386 while (!Worklist.empty()) {
387 Instruction *I = Worklist.back();
388
389 // If I has not been processed before, push each of its
390 // replacable users into the worklist.
391 if (Visited.insert(I).second) {
392 for (auto *U : I->users()) {
393 auto *UserInst = cast<Instruction>(U);
394 if (UsersToReplace.contains(UserInst) && !Visited.contains(UserInst))
395 Worklist.push_back(UserInst);
396 }
397 // Otherwise, users of I have already been pushed into
398 // the PostOrderWorklist. Push I as well.
399 } else {
400 PostOrderWorklist.insert(I);
401 Worklist.pop_back();
402 }
403 }
404
405 // Replace pointers in reverse-postorder.
406 for (Instruction *I : reverse(PostOrderWorklist))
407 replace(I);
408}
409
410void PointerReplacer::replace(Instruction *I) {
411 if (getReplacement(I))
412 return;
413
414 if (auto *LT = dyn_cast<LoadInst>(I)) {
415 auto *V = getReplacement(LT->getPointerOperand());
416 assert(V && "Operand not replaced");
417 auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
418 LT->getAlign(), LT->getOrdering(),
419 LT->getSyncScopeID());
420 NewI->takeName(LT);
421 copyMetadataForLoad(*NewI, *LT);
422
423 IC.InsertNewInstWith(NewI, LT->getIterator());
424 IC.replaceInstUsesWith(*LT, NewI);
425 // LT has actually been replaced by NewI. It is useless to insert LT into
426 // the map. Instead, we insert NewI into the map to indicate this is the
427 // replacement (new value).
428 WorkMap[NewI] = NewI;
429 } else if (auto *PHI = dyn_cast<PHINode>(I)) {
430 // Create a new PHI by replacing any incoming value that is a user of the
431 // root pointer and has a replacement.
432 Value *V = WorkMap.lookup(PHI->getIncomingValue(0));
433 PHI->mutateType(V ? V->getType() : PHI->getIncomingValue(0)->getType());
434 for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) {
435 Value *V = WorkMap.lookup(PHI->getIncomingValue(I));
436 PHI->setIncomingValue(I, V ? V : PHI->getIncomingValue(I));
437 }
438 WorkMap[PHI] = PHI;
439 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
440 auto *V = getReplacement(GEP->getPointerOperand());
441 assert(V && "Operand not replaced");
442 SmallVector<Value *, 8> Indices(GEP->indices());
443 auto *NewI =
444 GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
445 IC.InsertNewInstWith(NewI, GEP->getIterator());
446 NewI->takeName(GEP);
447 NewI->setNoWrapFlags(GEP->getNoWrapFlags());
448 WorkMap[GEP] = NewI;
449 } else if (auto *SI = dyn_cast<SelectInst>(I)) {
450 Value *TrueValue = SI->getTrueValue();
451 Value *FalseValue = SI->getFalseValue();
452 if (Value *Replacement = getReplacement(TrueValue))
453 TrueValue = Replacement;
454 if (Value *Replacement = getReplacement(FalseValue))
455 FalseValue = Replacement;
456 auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue,
457 SI->getName(), nullptr, SI);
458 IC.InsertNewInstWith(NewSI, SI->getIterator());
459 NewSI->takeName(SI);
460 WorkMap[SI] = NewSI;
461 } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
462 auto *DestV = MemCpy->getRawDest();
463 auto *SrcV = MemCpy->getRawSource();
464
465 if (auto *DestReplace = getReplacement(DestV))
466 DestV = DestReplace;
467 if (auto *SrcReplace = getReplacement(SrcV))
468 SrcV = SrcReplace;
469
470 IC.Builder.SetInsertPoint(MemCpy);
471 auto *NewI = IC.Builder.CreateMemTransferInst(
472 MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
473 MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
474 AAMDNodes AAMD = MemCpy->getAAMetadata();
475 if (AAMD)
476 NewI->setAAMetadata(AAMD);
477
478 IC.eraseInstFromFunction(*MemCpy);
479 WorkMap[MemCpy] = NewI;
480 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
481 auto *V = getReplacement(ASC->getPointerOperand());
482 assert(V && "Operand not replaced");
483 assert(isEqualOrValidAddrSpaceCast(
484 ASC, V->getType()->getPointerAddressSpace()) &&
485 "Invalid address space cast!");
486
487 if (V->getType()->getPointerAddressSpace() !=
488 ASC->getType()->getPointerAddressSpace()) {
489 auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
490 NewI->takeName(ASC);
491 IC.InsertNewInstWith(NewI, ASC->getIterator());
492 WorkMap[ASC] = NewI;
493 } else {
494 WorkMap[ASC] = V;
495 }
496
497 } else {
498 llvm_unreachable("should never reach here");
499 }
500}
501
503 if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
504 return I;
505
506 if (AI.getAllocatedType()->isSized()) {
507 // Move all alloca's of zero byte objects to the entry block and merge them
508 // together. Note that we only do this for alloca's, because malloc should
509 // allocate and return a unique pointer, even for a zero byte allocation.
510 if (DL.getTypeAllocSize(AI.getAllocatedType()).getKnownMinValue() == 0) {
511 // For a zero sized alloca there is no point in doing an array allocation.
512 // This is helpful if the array size is a complicated expression not used
513 // elsewhere.
514 if (AI.isArrayAllocation())
515 return replaceOperand(AI, 0,
516 ConstantInt::get(AI.getArraySize()->getType(), 1));
517
518 // Get the first instruction in the entry block.
519 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
520 BasicBlock::iterator FirstInst = EntryBlock.getFirstNonPHIOrDbg();
521 if (&*FirstInst != &AI) {
522 // If the entry block doesn't start with a zero-size alloca then move
523 // this one to the start of the entry block. There is no problem with
524 // dominance as the array size was forced to a constant earlier already.
525 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
526 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
527 DL.getTypeAllocSize(EntryAI->getAllocatedType())
528 .getKnownMinValue() != 0) {
529 AI.moveBefore(FirstInst);
530 return &AI;
531 }
532
533 // Replace this zero-sized alloca with the one at the start of the entry
534 // block after ensuring that the address will be aligned enough for both
535 // types.
536 const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
537 EntryAI->setAlignment(MaxAlign);
538 return replaceInstUsesWith(AI, EntryAI);
539 }
540 }
541 }
542
543 // Check to see if this allocation is only modified by a memcpy/memmove from
544 // a memory location whose alignment is equal to or exceeds that of the
545 // allocation. If this is the case, we can change all users to use the
546 // constant memory location instead. This is commonly produced by the CFE by
547 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
548 // is only subsequently read.
550 if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
551 Value *TheSrc = Copy->getSource();
552 Align AllocaAlign = AI.getAlign();
553 Align SourceAlign = getOrEnforceKnownAlignment(
554 TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
555 if (AllocaAlign <= SourceAlign &&
556 isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
557 !isa<Instruction>(TheSrc)) {
558 // FIXME: Can we sink instructions without violating dominance when TheSrc
559 // is an instruction instead of a constant or argument?
560 LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
561 LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
562 unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
563 if (AI.getAddressSpace() == SrcAddrSpace) {
564 for (Instruction *Delete : ToDelete)
565 eraseInstFromFunction(*Delete);
566
567 Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
569 ++NumGlobalCopies;
570 return NewI;
571 }
572
573 PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
574 if (PtrReplacer.collectUsers()) {
575 for (Instruction *Delete : ToDelete)
576 eraseInstFromFunction(*Delete);
577
578 PtrReplacer.replacePointer(TheSrc);
579 ++NumGlobalCopies;
580 }
581 }
582 }
583
584 // At last, use the generic allocation site handler to aggressively remove
585 // unused allocas.
586 return visitAllocSite(AI);
587}
588
589// Are we allowed to form a atomic load or store of this type?
590static bool isSupportedAtomicType(Type *Ty) {
591 return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
592}
593
594/// Helper to combine a load to a new type.
595///
596/// This just does the work of combining a load to a new type. It handles
597/// metadata, etc., and returns the new instruction. The \c NewTy should be the
598/// loaded *value* type. This will convert it to a pointer, cast the operand to
599/// that pointer type, load it, etc.
600///
601/// Note that this will create all of the instructions with whatever insert
602/// point the \c InstCombinerImpl currently is using.
604 const Twine &Suffix) {
605 assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
606 "can't fold an atomic load to requested type");
607
608 LoadInst *NewLoad =
609 Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),
610 LI.isVolatile(), LI.getName() + Suffix);
611 NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
612 copyMetadataForLoad(*NewLoad, LI);
613 return NewLoad;
614}
615
616/// Combine a store to a new type.
617///
618/// Returns the newly created store instruction.
620 Value *V) {
621 assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
622 "can't fold an atomic store of requested type");
623
624 Value *Ptr = SI.getPointerOperand();
626 SI.getAllMetadata(MD);
627
628 StoreInst *NewStore =
629 IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
630 NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
631 for (const auto &MDPair : MD) {
632 unsigned ID = MDPair.first;
633 MDNode *N = MDPair.second;
634 // Note, essentially every kind of metadata should be preserved here! This
635 // routine is supposed to clone a store instruction changing *only its
636 // type*. The only metadata it makes sense to drop is metadata which is
637 // invalidated when the pointer type changes. This should essentially
638 // never be the case in LLVM, but we explicitly switch over only known
639 // metadata to be conservatively correct. If you are adding metadata to
640 // LLVM which pertains to stores, you almost certainly want to add it
641 // here.
642 switch (ID) {
643 case LLVMContext::MD_dbg:
644 case LLVMContext::MD_DIAssignID:
645 case LLVMContext::MD_tbaa:
646 case LLVMContext::MD_prof:
647 case LLVMContext::MD_fpmath:
648 case LLVMContext::MD_tbaa_struct:
649 case LLVMContext::MD_alias_scope:
650 case LLVMContext::MD_noalias:
651 case LLVMContext::MD_nontemporal:
652 case LLVMContext::MD_mem_parallel_loop_access:
653 case LLVMContext::MD_access_group:
654 // All of these directly apply.
655 NewStore->setMetadata(ID, N);
656 break;
657 case LLVMContext::MD_invariant_load:
658 case LLVMContext::MD_nonnull:
659 case LLVMContext::MD_noundef:
660 case LLVMContext::MD_range:
661 case LLVMContext::MD_align:
662 case LLVMContext::MD_dereferenceable:
663 case LLVMContext::MD_dereferenceable_or_null:
664 // These don't apply for stores.
665 break;
666 }
667 }
668
669 return NewStore;
670}
671
672/// Combine loads to match the type of their uses' value after looking
673/// through intervening bitcasts.
674///
675/// The core idea here is that if the result of a load is used in an operation,
676/// we should load the type most conducive to that operation. For example, when
677/// loading an integer and converting that immediately to a pointer, we should
678/// instead directly load a pointer.
679///
680/// However, this routine must never change the width of a load or the number of
681/// loads as that would introduce a semantic change. This combine is expected to
682/// be a semantic no-op which just allows loads to more closely model the types
683/// of their consuming operations.
684///
685/// Currently, we also refuse to change the precise type used for an atomic load
686/// or a volatile load. This is debatable, and might be reasonable to change
687/// later. However, it is risky in case some backend or other part of LLVM is
688/// relying on the exact type loaded to select appropriate atomic operations.
690 LoadInst &Load) {
691 // FIXME: We could probably with some care handle both volatile and ordered
692 // atomic loads here but it isn't clear that this is important.
693 if (!Load.isUnordered())
694 return nullptr;
695
696 if (Load.use_empty())
697 return nullptr;
698
699 // swifterror values can't be bitcasted.
700 if (Load.getPointerOperand()->isSwiftError())
701 return nullptr;
702
703 // Fold away bit casts of the loaded value by loading the desired type.
704 // Note that we should not do this for pointer<->integer casts,
705 // because that would result in type punning.
706 if (Load.hasOneUse()) {
707 // Don't transform when the type is x86_amx, it makes the pass that lower
708 // x86_amx type happy.
709 Type *LoadTy = Load.getType();
710 if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
711 assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
712 if (BC->getType()->isX86_AMXTy())
713 return nullptr;
714 }
715
716 if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
717 Type *DestTy = CastUser->getDestTy();
718 if (CastUser->isNoopCast(IC.getDataLayout()) &&
719 LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
720 (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
721 LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
722 CastUser->replaceAllUsesWith(NewLoad);
723 IC.eraseInstFromFunction(*CastUser);
724 return &Load;
725 }
726 }
727 }
728
729 // FIXME: We should also canonicalize loads of vectors when their elements are
730 // cast to other types.
731 return nullptr;
732}
733
735 // FIXME: We could probably with some care handle both volatile and atomic
736 // stores here but it isn't clear that this is important.
737 if (!LI.isSimple())
738 return nullptr;
739
740 Type *T = LI.getType();
741 if (!T->isAggregateType())
742 return nullptr;
743
744 StringRef Name = LI.getName();
745
746 if (auto *ST = dyn_cast<StructType>(T)) {
747 // If the struct only have one element, we unpack.
748 auto NumElements = ST->getNumElements();
749 if (NumElements == 1) {
750 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
751 ".unpack");
752 NewLoad->setAAMetadata(LI.getAAMetadata());
753 // Copy invariant metadata from parent load.
754 NewLoad->copyMetadata(LI, LLVMContext::MD_invariant_load);
756 PoisonValue::get(T), NewLoad, 0, Name));
757 }
758
759 // We don't want to break loads with padding here as we'd loose
760 // the knowledge that padding exists for the rest of the pipeline.
761 const DataLayout &DL = IC.getDataLayout();
762 auto *SL = DL.getStructLayout(ST);
763
764 if (SL->hasPadding())
765 return nullptr;
766
767 const auto Align = LI.getAlign();
768 auto *Addr = LI.getPointerOperand();
769 auto *IdxType = DL.getIndexType(Addr->getType());
770
772 for (unsigned i = 0; i < NumElements; i++) {
773 auto *Ptr = IC.Builder.CreateInBoundsPtrAdd(
774 Addr, IC.Builder.CreateTypeSize(IdxType, SL->getElementOffset(i)),
775 Name + ".elt");
776 auto *L = IC.Builder.CreateAlignedLoad(
777 ST->getElementType(i), Ptr,
778 commonAlignment(Align, SL->getElementOffset(i).getKnownMinValue()),
779 Name + ".unpack");
780 // Propagate AA metadata. It'll still be valid on the narrowed load.
781 L->setAAMetadata(LI.getAAMetadata());
782 // Copy invariant metadata from parent load.
783 L->copyMetadata(LI, LLVMContext::MD_invariant_load);
784 V = IC.Builder.CreateInsertValue(V, L, i);
785 }
786
787 V->setName(Name);
788 return IC.replaceInstUsesWith(LI, V);
789 }
790
791 if (auto *AT = dyn_cast<ArrayType>(T)) {
792 auto *ET = AT->getElementType();
793 auto NumElements = AT->getNumElements();
794 if (NumElements == 1) {
795 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
796 NewLoad->setAAMetadata(LI.getAAMetadata());
798 PoisonValue::get(T), NewLoad, 0, Name));
799 }
800
801 // Bail out if the array is too large. Ideally we would like to optimize
802 // arrays of arbitrary size but this has a terrible impact on compile time.
803 // The threshold here is chosen arbitrarily, maybe needs a little bit of
804 // tuning.
805 if (NumElements > IC.MaxArraySizeForCombine)
806 return nullptr;
807
808 const DataLayout &DL = IC.getDataLayout();
809 TypeSize EltSize = DL.getTypeAllocSize(ET);
810 const auto Align = LI.getAlign();
811
812 auto *Addr = LI.getPointerOperand();
813 auto *IdxType = Type::getInt64Ty(T->getContext());
814 auto *Zero = ConstantInt::get(IdxType, 0);
815
818 for (uint64_t i = 0; i < NumElements; i++) {
819 Value *Indices[2] = {
820 Zero,
821 ConstantInt::get(IdxType, i),
822 };
823 auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
824 Name + ".elt");
825 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
826 auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
827 EltAlign, Name + ".unpack");
828 L->setAAMetadata(LI.getAAMetadata());
829 V = IC.Builder.CreateInsertValue(V, L, i);
830 Offset += EltSize;
831 }
832
833 V->setName(Name);
834 return IC.replaceInstUsesWith(LI, V);
835 }
836
837 return nullptr;
838}
839
840// If we can determine that all possible objects pointed to by the provided
841// pointer value are, not only dereferenceable, but also definitively less than
842// or equal to the provided maximum size, then return true. Otherwise, return
843// false (constant global values and allocas fall into this category).
844//
845// FIXME: This should probably live in ValueTracking (or similar).
847 const DataLayout &DL) {
849 SmallVector<Value *, 4> Worklist(1, V);
850
851 do {
852 Value *P = Worklist.pop_back_val();
853 P = P->stripPointerCasts();
854
855 if (!Visited.insert(P).second)
856 continue;
857
859 Worklist.push_back(SI->getTrueValue());
860 Worklist.push_back(SI->getFalseValue());
861 continue;
862 }
863
864 if (PHINode *PN = dyn_cast<PHINode>(P)) {
865 append_range(Worklist, PN->incoming_values());
866 continue;
867 }
868
870 if (GA->isInterposable())
871 return false;
872 Worklist.push_back(GA->getAliasee());
873 continue;
874 }
875
876 // If we know how big this object is, and it is less than MaxSize, continue
877 // searching. Otherwise, return false.
878 if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
879 std::optional<TypeSize> AllocSize = AI->getAllocationSize(DL);
880 if (!AllocSize || AllocSize->isScalable() ||
881 AllocSize->getFixedValue() > MaxSize)
882 return false;
883 continue;
884 }
885
887 if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
888 return false;
889
890 uint64_t InitSize = GV->getGlobalSize(DL);
891 if (InitSize > MaxSize)
892 return false;
893 continue;
894 }
895
896 return false;
897 } while (!Worklist.empty());
898
899 return true;
900}
901
902// If we're indexing into an object of a known size, and the outer index is
903// not a constant, but having any value but zero would lead to undefined
904// behavior, replace it with zero.
905//
906// For example, if we have:
907// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
908// ...
909// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
910// ... = load i32* %arrayidx, align 4
911// Then we know that we can replace %x in the GEP with i64 0.
912//
913// FIXME: We could fold any GEP index to zero that would cause UB if it were
914// not zero. Currently, we only handle the first such index. Also, we could
915// also search through non-zero constant indices if we kept track of the
916// offsets those indices implied.
918 GetElementPtrInst *GEPI, Instruction *MemI,
919 unsigned &Idx) {
920 if (GEPI->getNumOperands() < 2)
921 return false;
922
923 // Find the first non-zero index of a GEP. If all indices are zero, return
924 // one past the last index.
925 auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
926 unsigned I = 1;
927 for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
928 Value *V = GEPI->getOperand(I);
929 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
930 if (CI->isZero())
931 continue;
932
933 break;
934 }
935
936 return I;
937 };
938
939 // Skip through initial 'zero' indices, and find the corresponding pointer
940 // type. See if the next index is not a constant.
941 Idx = FirstNZIdx(GEPI);
942 if (Idx == GEPI->getNumOperands())
943 return false;
944 if (isa<Constant>(GEPI->getOperand(Idx)))
945 return false;
946
947 SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
948 Type *SourceElementType = GEPI->getSourceElementType();
949 // Size information about scalable vectors is not available, so we cannot
950 // deduce whether indexing at n is undefined behaviour or not. Bail out.
951 if (SourceElementType->isScalableTy())
952 return false;
953
954 Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
955 if (!AllocTy || !AllocTy->isSized())
956 return false;
957 const DataLayout &DL = IC.getDataLayout();
958 uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
959
960 // If there are more indices after the one we might replace with a zero, make
961 // sure they're all non-negative. If any of them are negative, the overall
962 // address being computed might be before the base address determined by the
963 // first non-zero index.
964 auto IsAllNonNegative = [&]() {
965 for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
966 KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), MemI);
967 if (Known.isNonNegative())
968 continue;
969 return false;
970 }
971
972 return true;
973 };
974
975 // FIXME: If the GEP is not inbounds, and there are extra indices after the
976 // one we'll replace, those could cause the address computation to wrap
977 // (rendering the IsAllNonNegative() check below insufficient). We can do
978 // better, ignoring zero indices (and other indices we can prove small
979 // enough not to wrap).
980 if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
981 return false;
982
983 // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
984 // also known to be dereferenceable.
985 return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
986 IsAllNonNegative();
987}
988
989// If we're indexing into an object with a variable index for the memory
990// access, but the object has only one element, we can assume that the index
991// will always be zero. If we replace the GEP, return it.
993 Instruction &MemI) {
995 unsigned Idx;
996 if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
997 Instruction *NewGEPI = GEPI->clone();
998 NewGEPI->setOperand(Idx,
999 ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
1000 IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
1001 return NewGEPI;
1002 }
1003 }
1004
1005 return nullptr;
1006}
1007
1009 if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
1010 return false;
1011
1012 auto *Ptr = SI.getPointerOperand();
1014 Ptr = GEPI->getOperand(0);
1015 return (isa<ConstantPointerNull>(Ptr) &&
1016 !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
1017}
1018
1021 const Value *GEPI0 = GEPI->getOperand(0);
1022 if (isa<ConstantPointerNull>(GEPI0) &&
1023 !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
1024 return true;
1025 }
1026 if (isa<UndefValue>(Op) ||
1029 return true;
1030 return false;
1031}
1032
1033Value *InstCombinerImpl::simplifyNonNullOperand(Value *V,
1034 bool HasDereferenceable,
1035 unsigned Depth) {
1036 if (auto *Sel = dyn_cast<SelectInst>(V)) {
1037 if (isa<ConstantPointerNull>(Sel->getOperand(1)))
1038 return Sel->getOperand(2);
1039
1040 if (isa<ConstantPointerNull>(Sel->getOperand(2)))
1041 return Sel->getOperand(1);
1042 }
1043
1044 if (!V->hasOneUse())
1045 return nullptr;
1046
1047 constexpr unsigned RecursionLimit = 3;
1048 if (Depth == RecursionLimit)
1049 return nullptr;
1050
1051 if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
1052 if (HasDereferenceable || GEP->isInBounds()) {
1053 if (auto *Res = simplifyNonNullOperand(GEP->getPointerOperand(),
1054 HasDereferenceable, Depth + 1)) {
1055 replaceOperand(*GEP, 0, Res);
1057 return nullptr;
1058 }
1059 }
1060 }
1061
1062 if (auto *PHI = dyn_cast<PHINode>(V)) {
1063 bool Changed = false;
1064 for (Use &U : PHI->incoming_values()) {
1065 // We set Depth to RecursionLimit to avoid expensive recursion.
1066 if (auto *Res = simplifyNonNullOperand(U.get(), HasDereferenceable,
1067 RecursionLimit)) {
1068 replaceUse(U, Res);
1069 Changed = true;
1070 }
1071 }
1072 if (Changed)
1074 return nullptr;
1075 }
1076
1077 return nullptr;
1078}
1079
1081 Value *Op = LI.getOperand(0);
1082 if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
1083 return replaceInstUsesWith(LI, Res);
1084
1085 // Try to canonicalize the loaded type.
1086 if (Instruction *Res = combineLoadToOperationType(*this, LI))
1087 return Res;
1088
1089 // Replace GEP indices if possible.
1090 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
1091 return replaceOperand(LI, 0, NewGEPI);
1092
1093 if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1094 return Res;
1095
1096 // Do really simple store-to-load forwarding and load CSE, to catch cases
1097 // where there are several consecutive memory accesses to the same location,
1098 // separated by a few arithmetic operations.
1099 bool IsLoadCSE = false;
1100 BatchAAResults BatchAA(*AA);
1101 if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
1102 if (IsLoadCSE)
1103 combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1104
1105 return replaceInstUsesWith(
1106 LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1107 LI.getName() + ".cast"));
1108 }
1109
1110 // None of the following transforms are legal for volatile/ordered atomic
1111 // loads. Most of them do apply for unordered atomics.
1112 if (!LI.isUnordered()) return nullptr;
1113
1114 // load(gep null, ...) -> unreachable
1115 // load null/undef -> unreachable
1116 // TODO: Consider a target hook for valid address spaces for this xforms.
1117 if (canSimplifyNullLoadOrGEP(LI, Op)) {
1120 }
1121
1122 if (Op->hasOneUse()) {
1123 // Change select and PHI nodes to select values instead of addresses: this
1124 // helps alias analysis out a lot, allows many others simplifications, and
1125 // exposes redundancy in the code.
1126 //
1127 // Note that we cannot do the transformation unless we know that the
1128 // introduced loads cannot trap! Something like this is valid as long as
1129 // the condition is always false: load (select bool %C, int* null, int* %G),
1130 // but it would not be valid if we transformed it to load from null
1131 // unconditionally.
1132 //
1133
1135 Value *SelectOp = Op;
1136 if (ASC && ASC->getOperand(0)->hasOneUse())
1137 SelectOp = ASC->getOperand(0);
1138 if (SelectInst *SI = dyn_cast<SelectInst>(SelectOp)) {
1139 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1140 // or
1141 // load (addrspacecast(select (Cond, &V1, &V2))) -->
1142 // select(Cond, load (addrspacecast(&V1)), load (addrspacecast(&V2))).
1143 Align Alignment = LI.getAlign();
1144 if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1145 Alignment, DL, SI) &&
1146 isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1147 Alignment, DL, SI)) {
1148
1149 auto MaybeCastedLoadOperand = [&](Value *Op) {
1150 if (ASC)
1151 return Builder.CreateAddrSpaceCast(Op, ASC->getType(),
1152 Op->getName() + ".cast");
1153 return Op;
1154 };
1155 Value *LoadOp1 = MaybeCastedLoadOperand(SI->getOperand(1));
1156 LoadInst *V1 = Builder.CreateLoad(LI.getType(), LoadOp1,
1157 LoadOp1->getName() + ".val");
1158
1159 Value *LoadOp2 = MaybeCastedLoadOperand(SI->getOperand(2));
1160 LoadInst *V2 = Builder.CreateLoad(LI.getType(), LoadOp2,
1161 LoadOp2->getName() + ".val");
1162 assert(LI.isUnordered() && "implied by above");
1163 V1->setAlignment(Alignment);
1164 V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1165 V2->setAlignment(Alignment);
1166 V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1167 // It is safe to copy any metadata that does not trigger UB. Copy any
1168 // poison-generating metadata.
1171 return SelectInst::Create(SI->getCondition(), V1, V2, "", nullptr,
1172 ProfcheckDisableMetadataFixes ? nullptr : SI);
1173 }
1174 }
1175 }
1176
1178 if (Value *V = simplifyNonNullOperand(Op, /*HasDereferenceable=*/true))
1179 return replaceOperand(LI, 0, V);
1180
1181 return nullptr;
1182}
1183
1184/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1185///
1186/// \returns underlying value that was "cast", or nullptr otherwise.
1187///
1188/// For example, if we have:
1189///
1190/// %E0 = extractelement <2 x double> %U, i32 0
1191/// %V0 = insertvalue [2 x double] undef, double %E0, 0
1192/// %E1 = extractelement <2 x double> %U, i32 1
1193/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1194///
1195/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1196/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1197/// Note that %U may contain non-undef values where %V1 has undef.
1199 Value *U = nullptr;
1200 while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1201 auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1202 if (!E)
1203 return nullptr;
1204 auto *W = E->getVectorOperand();
1205 if (!U)
1206 U = W;
1207 else if (U != W)
1208 return nullptr;
1209 auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1210 if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1211 return nullptr;
1212 V = IV->getAggregateOperand();
1213 }
1214 if (!match(V, m_Undef()) || !U)
1215 return nullptr;
1216
1217 auto *UT = cast<VectorType>(U->getType());
1218 auto *VT = V->getType();
1219 // Check that types UT and VT are bitwise isomorphic.
1220 const auto &DL = IC.getDataLayout();
1221 if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1222 return nullptr;
1223 }
1224 if (auto *AT = dyn_cast<ArrayType>(VT)) {
1225 if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1226 return nullptr;
1227 } else {
1228 auto *ST = cast<StructType>(VT);
1229 if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1230 return nullptr;
1231 for (const auto *EltT : ST->elements()) {
1232 if (EltT != UT->getElementType())
1233 return nullptr;
1234 }
1235 }
1236 return U;
1237}
1238
1239/// Combine stores to match the type of value being stored.
1240///
1241/// The core idea here is that the memory does not have any intrinsic type and
1242/// where we can we should match the type of a store to the type of value being
1243/// stored.
1244///
1245/// However, this routine must never change the width of a store or the number of
1246/// stores as that would introduce a semantic change. This combine is expected to
1247/// be a semantic no-op which just allows stores to more closely model the types
1248/// of their incoming values.
1249///
1250/// Currently, we also refuse to change the precise type used for an atomic or
1251/// volatile store. This is debatable, and might be reasonable to change later.
1252/// However, it is risky in case some backend or other part of LLVM is relying
1253/// on the exact type stored to select appropriate atomic operations.
1254///
1255/// \returns true if the store was successfully combined away. This indicates
1256/// the caller must erase the store instruction. We have to let the caller erase
1257/// the store instruction as otherwise there is no way to signal whether it was
1258/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1260 // FIXME: We could probably with some care handle both volatile and ordered
1261 // atomic stores here but it isn't clear that this is important.
1262 if (!SI.isUnordered())
1263 return false;
1264
1265 // swifterror values can't be bitcasted.
1266 if (SI.getPointerOperand()->isSwiftError())
1267 return false;
1268
1269 Value *V = SI.getValueOperand();
1270
1271 // Fold away bit casts of the stored value by storing the original type.
1272 if (auto *BC = dyn_cast<BitCastInst>(V)) {
1273 assert(!BC->getType()->isX86_AMXTy() &&
1274 "store to x86_amx* should not happen!");
1275 V = BC->getOperand(0);
1276 // Don't transform when the type is x86_amx, it makes the pass that lower
1277 // x86_amx type happy.
1278 if (V->getType()->isX86_AMXTy())
1279 return false;
1280 if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1281 combineStoreToNewValue(IC, SI, V);
1282 return true;
1283 }
1284 }
1285
1286 if (Value *U = likeBitCastFromVector(IC, V))
1287 if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1288 combineStoreToNewValue(IC, SI, U);
1289 return true;
1290 }
1291
1292 // FIXME: We should also canonicalize stores of vectors when their elements
1293 // are cast to other types.
1294 return false;
1295}
1296
1298 // FIXME: We could probably with some care handle both volatile and atomic
1299 // stores here but it isn't clear that this is important.
1300 if (!SI.isSimple())
1301 return false;
1302
1303 Value *V = SI.getValueOperand();
1304 Type *T = V->getType();
1305
1306 if (!T->isAggregateType())
1307 return false;
1308
1309 if (auto *ST = dyn_cast<StructType>(T)) {
1310 // If the struct only have one element, we unpack.
1311 unsigned Count = ST->getNumElements();
1312 if (Count == 1) {
1313 V = IC.Builder.CreateExtractValue(V, 0);
1314 combineStoreToNewValue(IC, SI, V);
1315 return true;
1316 }
1317
1318 // We don't want to break loads with padding here as we'd loose
1319 // the knowledge that padding exists for the rest of the pipeline.
1320 const DataLayout &DL = IC.getDataLayout();
1321 auto *SL = DL.getStructLayout(ST);
1322
1323 if (SL->hasPadding())
1324 return false;
1325
1326 const auto Align = SI.getAlign();
1327
1328 SmallString<16> EltName = V->getName();
1329 EltName += ".elt";
1330 auto *Addr = SI.getPointerOperand();
1331 SmallString<16> AddrName = Addr->getName();
1332 AddrName += ".repack";
1333
1334 auto *IdxType = DL.getIndexType(Addr->getType());
1335 for (unsigned i = 0; i < Count; i++) {
1336 auto *Ptr = IC.Builder.CreateInBoundsPtrAdd(
1337 Addr, IC.Builder.CreateTypeSize(IdxType, SL->getElementOffset(i)),
1338 AddrName);
1339 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1340 auto EltAlign =
1341 commonAlignment(Align, SL->getElementOffset(i).getKnownMinValue());
1342 llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1343 NS->setAAMetadata(SI.getAAMetadata());
1344 }
1345
1346 return true;
1347 }
1348
1349 if (auto *AT = dyn_cast<ArrayType>(T)) {
1350 // If the array only have one element, we unpack.
1351 auto NumElements = AT->getNumElements();
1352 if (NumElements == 1) {
1353 V = IC.Builder.CreateExtractValue(V, 0);
1354 combineStoreToNewValue(IC, SI, V);
1355 return true;
1356 }
1357
1358 // Bail out if the array is too large. Ideally we would like to optimize
1359 // arrays of arbitrary size but this has a terrible impact on compile time.
1360 // The threshold here is chosen arbitrarily, maybe needs a little bit of
1361 // tuning.
1362 if (NumElements > IC.MaxArraySizeForCombine)
1363 return false;
1364
1365 const DataLayout &DL = IC.getDataLayout();
1366 TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
1367 const auto Align = SI.getAlign();
1368
1369 SmallString<16> EltName = V->getName();
1370 EltName += ".elt";
1371 auto *Addr = SI.getPointerOperand();
1372 SmallString<16> AddrName = Addr->getName();
1373 AddrName += ".repack";
1374
1375 auto *IdxType = Type::getInt64Ty(T->getContext());
1376 auto *Zero = ConstantInt::get(IdxType, 0);
1377
1379 for (uint64_t i = 0; i < NumElements; i++) {
1380 Value *Indices[2] = {
1381 Zero,
1382 ConstantInt::get(IdxType, i),
1383 };
1384 auto *Ptr =
1385 IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
1386 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1387 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
1388 Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1389 NS->setAAMetadata(SI.getAAMetadata());
1390 Offset += EltSize;
1391 }
1392
1393 return true;
1394 }
1395
1396 return false;
1397}
1398
1399/// equivalentAddressValues - Test if A and B will obviously have the same
1400/// value. This includes recognizing that %t0 and %t1 will have the same
1401/// value in code like this:
1402/// %t0 = getelementptr \@a, 0, 3
1403/// store i32 0, i32* %t0
1404/// %t1 = getelementptr \@a, 0, 3
1405/// %t2 = load i32* %t1
1406///
1408 // Test if the values are trivially equivalent.
1409 if (A == B) return true;
1410
1411 // Test if the values come form identical arithmetic instructions.
1412 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1413 // its only used to compare two uses within the same basic block, which
1414 // means that they'll always either have the same value or one of them
1415 // will have an undefined value.
1416 if (isa<BinaryOperator>(A) ||
1417 isa<CastInst>(A) ||
1418 isa<PHINode>(A) ||
1421 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1422 return true;
1423
1424 // Otherwise they may not be equivalent.
1425 return false;
1426}
1427
1429 Value *Val = SI.getOperand(0);
1430 Value *Ptr = SI.getOperand(1);
1431
1432 // Try to canonicalize the stored type.
1433 if (combineStoreToValueType(*this, SI))
1434 return eraseInstFromFunction(SI);
1435
1436 // Try to canonicalize the stored type.
1437 if (unpackStoreToAggregate(*this, SI))
1438 return eraseInstFromFunction(SI);
1439
1440 // Replace GEP indices if possible.
1441 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
1442 return replaceOperand(SI, 1, NewGEPI);
1443
1444 // Don't hack volatile/ordered stores.
1445 // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1446 if (!SI.isUnordered()) return nullptr;
1447
1448 // If the RHS is an alloca with a single use, zapify the store, making the
1449 // alloca dead.
1450 if (Ptr->hasOneUse()) {
1451 if (isa<AllocaInst>(Ptr))
1452 return eraseInstFromFunction(SI);
1454 if (isa<AllocaInst>(GEP->getOperand(0))) {
1455 if (GEP->getOperand(0)->hasOneUse())
1456 return eraseInstFromFunction(SI);
1457 }
1458 }
1459 }
1460
1461 // If we have a store to a location which is known constant, we can conclude
1462 // that the store must be storing the constant value (else the memory
1463 // wouldn't be constant), and this must be a noop.
1464 if (!isModSet(AA->getModRefInfoMask(Ptr)))
1465 return eraseInstFromFunction(SI);
1466
1467 // Do really simple DSE, to catch cases where there are several consecutive
1468 // stores to the same location, separated by a few arithmetic operations. This
1469 // situation often occurs with bitfield accesses.
1471 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1472 --ScanInsts) {
1473 --BBI;
1474 // Don't count debug info directives, lest they affect codegen,
1475 // and we skip pointer-to-pointer bitcasts, which are NOPs.
1476 if (BBI->isDebugOrPseudoInst()) {
1477 ScanInsts++;
1478 continue;
1479 }
1480
1481 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1482 // Prev store isn't volatile, and stores to the same location?
1483 if (PrevSI->isUnordered() &&
1484 equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
1485 PrevSI->getValueOperand()->getType() ==
1486 SI.getValueOperand()->getType()) {
1487 ++NumDeadStore;
1488 // Manually add back the original store to the worklist now, so it will
1489 // be processed after the operands of the removed store, as this may
1490 // expose additional DSE opportunities.
1491 Worklist.push(&SI);
1492 eraseInstFromFunction(*PrevSI);
1493 return nullptr;
1494 }
1495 break;
1496 }
1497
1498 // If this is a load, we have to stop. However, if the loaded value is from
1499 // the pointer we're loading and is producing the pointer we're storing,
1500 // then *this* store is dead (X = load P; store X -> P).
1501 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1502 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1503 assert(SI.isUnordered() && "can't eliminate ordering operation");
1504 return eraseInstFromFunction(SI);
1505 }
1506
1507 // Otherwise, this is a load from some other location. Stores before it
1508 // may not be dead.
1509 break;
1510 }
1511
1512 // Don't skip over loads, throws or things that can modify memory.
1513 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1514 break;
1515 }
1516
1517 // store X, null -> turns into 'unreachable' in SimplifyCFG
1518 // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1520 if (!isa<PoisonValue>(Val))
1521 return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1522 return nullptr; // Do not modify these!
1523 }
1524
1525 // This is a non-terminator unreachable marker. Don't remove it.
1526 if (isa<UndefValue>(Ptr)) {
1527 // Remove guaranteed-to-transfer instructions before the marker.
1529
1530 // Remove all instructions after the marker and handle dead blocks this
1531 // implies.
1533 handleUnreachableFrom(SI.getNextNode(), Worklist);
1535 return nullptr;
1536 }
1537
1538 // store undef, Ptr -> noop
1539 // FIXME: This is technically incorrect because it might overwrite a poison
1540 // value. Change to PoisonValue once #52930 is resolved.
1541 if (isa<UndefValue>(Val))
1542 return eraseInstFromFunction(SI);
1543
1544 if (!NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
1545 if (Value *V = simplifyNonNullOperand(Ptr, /*HasDereferenceable=*/true))
1546 return replaceOperand(SI, 1, V);
1547
1548 return nullptr;
1549}
1550
1551/// Try to transform:
1552/// if () { *P = v1; } else { *P = v2 }
1553/// or:
1554/// *P = v1; if () { *P = v2; }
1555/// into a phi node with a store in the successor.
1557 if (!SI.isUnordered())
1558 return false; // This code has not been audited for volatile/ordered case.
1559
1560 // Check if the successor block has exactly 2 incoming edges.
1561 BasicBlock *StoreBB = SI.getParent();
1562 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1563 if (!DestBB->hasNPredecessors(2))
1564 return false;
1565
1566 // Capture the other block (the block that doesn't contain our store).
1567 pred_iterator PredIter = pred_begin(DestBB);
1568 if (*PredIter == StoreBB)
1569 ++PredIter;
1570 BasicBlock *OtherBB = *PredIter;
1571
1572 // Bail out if all of the relevant blocks aren't distinct. This can happen,
1573 // for example, if SI is in an infinite loop.
1574 if (StoreBB == DestBB || OtherBB == DestBB)
1575 return false;
1576
1577 // Verify that the other block ends in a branch and is not otherwise empty.
1578 BasicBlock::iterator BBI(OtherBB->getTerminator());
1579 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1580 if (!OtherBr || BBI == OtherBB->begin())
1581 return false;
1582
1583 auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1584 if (!OtherStore ||
1585 OtherStore->getPointerOperand() != SI.getPointerOperand())
1586 return false;
1587
1588 auto *SIVTy = SI.getValueOperand()->getType();
1589 auto *OSVTy = OtherStore->getValueOperand()->getType();
1590 return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1591 SI.hasSameSpecialState(OtherStore);
1592 };
1593
1594 // If the other block ends in an unconditional branch, check for the 'if then
1595 // else' case. There is an instruction before the branch.
1596 StoreInst *OtherStore = nullptr;
1597 if (OtherBr->isUnconditional()) {
1598 --BBI;
1599 // Skip over debugging info and pseudo probes.
1600 while (BBI->isDebugOrPseudoInst()) {
1601 if (BBI==OtherBB->begin())
1602 return false;
1603 --BBI;
1604 }
1605 // If this isn't a store, isn't a store to the same location, or is not the
1606 // right kind of store, bail out.
1607 OtherStore = dyn_cast<StoreInst>(BBI);
1608 if (!OtherStoreIsMergeable(OtherStore))
1609 return false;
1610 } else {
1611 // Otherwise, the other block ended with a conditional branch. If one of the
1612 // destinations is StoreBB, then we have the if/then case.
1613 if (OtherBr->getSuccessor(0) != StoreBB &&
1614 OtherBr->getSuccessor(1) != StoreBB)
1615 return false;
1616
1617 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1618 // if/then triangle. See if there is a store to the same ptr as SI that
1619 // lives in OtherBB.
1620 for (;; --BBI) {
1621 // Check to see if we find the matching store.
1622 OtherStore = dyn_cast<StoreInst>(BBI);
1623 if (OtherStoreIsMergeable(OtherStore))
1624 break;
1625
1626 // If we find something that may be using or overwriting the stored
1627 // value, or if we run out of instructions, we can't do the transform.
1628 if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1629 BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1630 return false;
1631 }
1632
1633 // In order to eliminate the store in OtherBr, we have to make sure nothing
1634 // reads or overwrites the stored value in StoreBB.
1635 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1636 // FIXME: This should really be AA driven.
1637 if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1638 return false;
1639 }
1640 }
1641
1642 // Insert a PHI node now if we need it.
1643 Value *MergedVal = OtherStore->getValueOperand();
1644 // The debug locations of the original instructions might differ. Merge them.
1645 DebugLoc MergedLoc =
1646 DebugLoc::getMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc());
1647 if (MergedVal != SI.getValueOperand()) {
1648 PHINode *PN =
1649 PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1650 PN->addIncoming(SI.getValueOperand(), SI.getParent());
1651 Builder.SetInsertPoint(OtherStore);
1652 PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1653 OtherBB);
1654 MergedVal = InsertNewInstBefore(PN, DestBB->begin());
1655 PN->setDebugLoc(MergedLoc);
1656 }
1657
1658 // Advance to a place where it is safe to insert the new store and insert it.
1659 BBI = DestBB->getFirstInsertionPt();
1660 StoreInst *NewSI =
1661 new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
1662 SI.getOrdering(), SI.getSyncScopeID());
1663 InsertNewInstBefore(NewSI, BBI);
1664 NewSI->setDebugLoc(MergedLoc);
1665 NewSI->mergeDIAssignID({&SI, OtherStore});
1666
1667 // If the two stores had AA tags, merge them.
1668 AAMDNodes AATags = SI.getAAMetadata();
1669 if (AATags)
1670 NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1671
1672 // Nuke the old stores.
1674 eraseInstFromFunction(*OtherStore);
1675 return true;
1676}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides internal interfaces used to implement the InstCombine.
static StoreInst * combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
static Instruction * combineLoadToOperationType(InstCombinerImpl &IC, LoadInst &Load)
Combine loads to match the type of their uses' value after looking through intervening bitcasts.
static Instruction * replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, Instruction &MemI)
static Instruction * simplifyAllocaArraySize(InstCombinerImpl &IC, AllocaInst &AI, DominatorTree &DT)
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isSupportedAtomicType(Type *Ty)
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
static Instruction * unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI)
static cl::opt< unsigned > MaxCopiedFromConstantUsers("instcombine-max-copied-from-constant-users", cl::init(300), cl::desc("Maximum users to visit in copy from constant transform"), cl::Hidden)
static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI)
static Value * likeBitCastFromVector(InstCombinerImpl &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
static bool isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction * > &ToDelete)
isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) pointer to an alloca.
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
This file provides the interface for the instcombine pass implementation.
@ RecursionLimit
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
#define T
#define P(N)
This file defines the SmallString class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
A debug info location.
Definition DebugLoc.h:123
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:179
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
Type * getSourceElementType() const
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition IRBuilder.h:1834
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2605
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1868
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2598
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1949
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522
LLVM_ABI Value * CreateTypeSize(Type *Ty, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2246
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1887
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2027
LLVM_ABI CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitLoadInst(LoadInst &LI)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitStoreInst(StoreInst &SI)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; }...
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
bool removeInstructionsBeforeUnreachable(Instruction &I)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitAllocaInst(AllocaInst &AI)
SimplifyQuery SQ
const DataLayout & getDataLayout() const
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
BuilderTy & Builder
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
bool isUnordered() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1080
This class wraps the llvm.memcpy/memmove intrinsics.
static constexpr const unsigned PoisonGeneratingIDs[]
Metadata IDs that may generate poison.
Definition Metadata.h:146
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getZero()
Definition TypeSize.h:349
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:200
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool match(Val *V, const Pattern &P)
auto m_Undef()
Match an arbitrary undef constant.
initializer< Ty > init(const Ty &Val)
LLVM_ABI bool isAvailable()
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition Loads.cpp:229
LLVM_ABI void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition Local.cpp:3107
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
LLVM_ABI Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition Loads.cpp:542
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition Local.cpp:1566
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition Loads.cpp:435
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2414
LLVM_ABI Value * simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q)
Given a load instruction and its pointer operand, fold the result or return null.
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3098
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1908
DWARFExpression::Operation Op
PredIterator< BasicBlock, Value::user_iterator > pred_iterator
Definition CFG.h:105
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition Metadata.cpp:64
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108