LLVM 22.0.0git
GlobalOpt.cpp
Go to the documentation of this file.
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass transforms simple global variables that never have their address
10// taken. If obviously true, it marks read/write globals as constant, deletes
11// variables only stored to, etc.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/ADT/Twine.h"
30#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/GlobalAlias.h"
41#include "llvm/IR/GlobalValue.h"
43#include "llvm/IR/IRBuilder.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/Operator.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/Use.h"
52#include "llvm/IR/User.h"
53#include "llvm/IR/Value.h"
54#include "llvm/IR/ValueHandle.h"
58#include "llvm/Support/Debug.h"
61#include "llvm/Transforms/IPO.h"
66#include <cassert>
67#include <cstdint>
68#include <optional>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "globalopt"
75
76STATISTIC(NumMarked , "Number of globals marked constant");
77STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
78STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
79STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
80STATISTIC(NumDeleted , "Number of globals deleted");
81STATISTIC(NumGlobUses , "Number of global uses devirtualized");
82STATISTIC(NumLocalized , "Number of globals localized");
83STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
84STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
85STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
86STATISTIC(NumNestRemoved , "Number of nest attributes removed");
87STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
88STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
89STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
90STATISTIC(NumAtExitRemoved, "Number of atexit handlers removed");
91STATISTIC(NumInternalFunc, "Number of internal functions");
92STATISTIC(NumColdCC, "Number of functions marked coldcc");
93STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs");
94STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed");
95
96static cl::opt<bool>
97 OptimizeNonFMVCallers("optimize-non-fmv-callers",
98 cl::desc("Statically resolve calls to versioned "
99 "functions from non-versioned callers."),
100 cl::init(true), cl::Hidden);
101
103 "max-ifunc-versions", cl::Hidden, cl::init(5),
104 cl::desc("Maximum number of caller/callee versions that is allowed for "
105 "using the expensive (cubic) static resolution algorithm."));
106
107static cl::opt<bool>
108 EnableColdCCStressTest("enable-coldcc-stress-test",
109 cl::desc("Enable stress test of coldcc by adding "
110 "calling conv to all internal functions."),
111 cl::init(false), cl::Hidden);
112
114 "coldcc-rel-freq", cl::Hidden, cl::init(2),
115 cl::desc(
116 "Maximum block frequency, expressed as a percentage of caller's "
117 "entry frequency, for a call site to be considered cold for enabling "
118 "coldcc"));
119
120/// Is this global variable possibly used by a leak checker as a root? If so,
121/// we might not really want to eliminate the stores to it.
123 // A global variable is a root if it is a pointer, or could plausibly contain
124 // a pointer. There are two challenges; one is that we could have a struct
125 // the has an inner member which is a pointer. We recurse through the type to
126 // detect these (up to a point). The other is that we may actually be a union
127 // of a pointer and another type, and so our LLVM type is an integer which
128 // gets converted into a pointer, or our type is an [i8 x #] with a pointer
129 // potentially contained here.
130
131 if (GV->hasPrivateLinkage())
132 return false;
133
135 Types.push_back(GV->getValueType());
136
137 unsigned Limit = 20;
138 do {
139 Type *Ty = Types.pop_back_val();
140 switch (Ty->getTypeID()) {
141 default: break;
143 return true;
146 if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
147 return true;
148 break;
149 case Type::ArrayTyID:
150 Types.push_back(cast<ArrayType>(Ty)->getElementType());
151 break;
152 case Type::StructTyID: {
153 StructType *STy = cast<StructType>(Ty);
154 if (STy->isOpaque()) return true;
155 for (Type *InnerTy : STy->elements()) {
156 if (isa<PointerType>(InnerTy)) return true;
157 if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) ||
158 isa<VectorType>(InnerTy))
159 Types.push_back(InnerTy);
160 }
161 break;
162 }
163 }
164 if (--Limit == 0) return true;
165 } while (!Types.empty());
166 return false;
167}
168
169/// Given a value that is stored to a global but never read, determine whether
170/// it's safe to remove the store and the chain of computation that feeds the
171/// store.
173 Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
174 do {
175 if (isa<Constant>(V))
176 return true;
177 if (!V->hasOneUse())
178 return false;
179 if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
181 return false;
182 if (isAllocationFn(V, GetTLI))
183 return true;
184
186 if (I->mayHaveSideEffects())
187 return false;
189 if (!GEP->hasAllConstantIndices())
190 return false;
191 } else if (I->getNumOperands() != 1) {
192 return false;
193 }
194
195 V = I->getOperand(0);
196 } while (true);
197}
198
199/// This GV is a pointer root. Loop over all users of the global and clean up
200/// any that obviously don't assign the global a value that isn't dynamically
201/// allocated.
202static bool
205 // A brief explanation of leak checkers. The goal is to find bugs where
206 // pointers are forgotten, causing an accumulating growth in memory
207 // usage over time. The common strategy for leak checkers is to explicitly
208 // allow the memory pointed to by globals at exit. This is popular because it
209 // also solves another problem where the main thread of a C++ program may shut
210 // down before other threads that are still expecting to use those globals. To
211 // handle that case, we expect the program may create a singleton and never
212 // destroy it.
213
214 bool Changed = false;
215
216 // If Dead[n].first is the only use of a malloc result, we can delete its
217 // chain of computation and the store to the global in Dead[n].second.
219
220 SmallVector<User *> Worklist(GV->users());
221 // Constants can't be pointers to dynamically allocated memory.
222 while (!Worklist.empty()) {
223 User *U = Worklist.pop_back_val();
224 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
225 Value *V = SI->getValueOperand();
226 if (isa<Constant>(V)) {
227 Changed = true;
228 SI->eraseFromParent();
229 } else if (Instruction *I = dyn_cast<Instruction>(V)) {
230 if (I->hasOneUse())
231 Dead.push_back(std::make_pair(I, SI));
232 }
233 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
234 if (isa<Constant>(MSI->getValue())) {
235 Changed = true;
236 MSI->eraseFromParent();
237 } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
238 if (I->hasOneUse())
239 Dead.push_back(std::make_pair(I, MSI));
240 }
241 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
242 GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
243 if (MemSrc && MemSrc->isConstant()) {
244 Changed = true;
245 MTI->eraseFromParent();
246 } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) {
247 if (I->hasOneUse())
248 Dead.push_back(std::make_pair(I, MTI));
249 }
250 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
251 if (isa<GEPOperator>(CE))
252 append_range(Worklist, CE->users());
253 }
254 }
255
256 for (const auto &[Inst, Store] : Dead) {
257 if (IsSafeComputationToRemove(Inst, GetTLI)) {
258 Store->eraseFromParent();
259 Instruction *I = Inst;
260 do {
261 if (isAllocationFn(I, GetTLI))
262 break;
263 Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
264 if (!J)
265 break;
266 I->eraseFromParent();
267 I = J;
268 } while (true);
269 I->eraseFromParent();
270 Changed = true;
271 }
272 }
273
275 return Changed;
276}
277
278/// We just marked GV constant. Loop over all users of the global, cleaning up
279/// the obvious ones. This is largely just a quick scan over the use list to
280/// clean up the easy and obvious cruft. This returns true if it made a change.
282 const DataLayout &DL) {
284 SmallVector<User *, 8> WorkList(GV->users());
286 bool Changed = false;
287
288 SmallVector<WeakTrackingVH> MaybeDeadInsts;
289 auto EraseFromParent = [&](Instruction *I) {
290 for (Value *Op : I->operands())
291 if (auto *OpI = dyn_cast<Instruction>(Op))
292 MaybeDeadInsts.push_back(OpI);
293 I->eraseFromParent();
294 Changed = true;
295 };
296 while (!WorkList.empty()) {
297 User *U = WorkList.pop_back_val();
298 if (!Visited.insert(U).second)
299 continue;
300
301 if (auto *BO = dyn_cast<BitCastOperator>(U))
302 append_range(WorkList, BO->users());
303 if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
304 append_range(WorkList, ASC->users());
305 else if (auto *GEP = dyn_cast<GEPOperator>(U))
306 append_range(WorkList, GEP->users());
307 else if (auto *LI = dyn_cast<LoadInst>(U)) {
308 // A load from a uniform value is always the same, regardless of any
309 // applied offset.
310 Type *Ty = LI->getType();
312 LI->replaceAllUsesWith(Res);
313 EraseFromParent(LI);
314 continue;
315 }
316
317 Value *PtrOp = LI->getPointerOperand();
318 APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
320 DL, Offset, /* AllowNonInbounds */ true);
322 if (II->getIntrinsicID() == Intrinsic::threadlocal_address)
323 PtrOp = II->getArgOperand(0);
324 }
325 if (PtrOp == GV) {
326 if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
327 LI->replaceAllUsesWith(Value);
328 EraseFromParent(LI);
329 }
330 }
331 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
332 // Store must be unreachable or storing Init into the global.
333 EraseFromParent(SI);
334 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
335 if (getUnderlyingObject(MI->getRawDest()) == GV)
336 EraseFromParent(MI);
337 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
338 if (II->getIntrinsicID() == Intrinsic::threadlocal_address)
339 append_range(WorkList, II->users());
340 }
341 }
342
343 Changed |=
346 return Changed;
347}
348
349/// Part of the global at a specific offset, which is only accessed through
350/// loads and stores with the given type.
354 bool IsLoaded = false;
355 bool IsStored = false;
356};
357
358/// Look at all uses of the global and determine which (offset, type) pairs it
359/// can be split into.
361 GlobalVariable *GV, const DataLayout &DL) {
362 SmallVector<Use *, 16> Worklist;
364 auto AppendUses = [&](Value *V) {
365 for (Use &U : V->uses())
366 if (Visited.insert(&U).second)
367 Worklist.push_back(&U);
368 };
369 AppendUses(GV);
370 while (!Worklist.empty()) {
371 Use *U = Worklist.pop_back_val();
372 User *V = U->getUser();
373
374 auto *GEP = dyn_cast<GEPOperator>(V);
376 (GEP && GEP->hasAllConstantIndices())) {
377 AppendUses(V);
378 continue;
379 }
380
381 if (Value *Ptr = getLoadStorePointerOperand(V)) {
382 // This is storing the global address into somewhere, not storing into
383 // the global.
384 if (isa<StoreInst>(V) && U->getOperandNo() == 0)
385 return false;
386
387 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
388 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
389 /* AllowNonInbounds */ true);
390 if (Ptr != GV || Offset.getActiveBits() >= 64)
391 return false;
392
393 // TODO: We currently require that all accesses at a given offset must
394 // use the same type. This could be relaxed.
395 Type *Ty = getLoadStoreType(V);
396 const auto &[It, Inserted] =
397 Parts.try_emplace(Offset.getZExtValue(), GlobalPart{Ty});
398 if (Ty != It->second.Ty)
399 return false;
400
401 if (Inserted) {
402 It->second.Initializer =
404 if (!It->second.Initializer) {
405 LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
406 << *GV << " with type " << *Ty << " at offset "
407 << Offset.getZExtValue());
408 return false;
409 }
410 }
411
412 // Scalable types not currently supported.
413 if (Ty->isScalableTy())
414 return false;
415
416 auto IsStored = [](Value *V, Constant *Initializer) {
417 auto *SI = dyn_cast<StoreInst>(V);
418 if (!SI)
419 return false;
420
421 Constant *StoredConst = dyn_cast<Constant>(SI->getOperand(0));
422 if (!StoredConst)
423 return true;
424
425 // Don't consider stores that only write the initializer value.
426 return Initializer != StoredConst;
427 };
428
429 It->second.IsLoaded |= isa<LoadInst>(V);
430 It->second.IsStored |= IsStored(V, It->second.Initializer);
431 continue;
432 }
433
434 // Ignore dead constant users.
435 if (auto *C = dyn_cast<Constant>(V)) {
437 return false;
438 continue;
439 }
440
441 // Unknown user.
442 return false;
443 }
444
445 return true;
446}
447
448/// Copy over the debug info for a variable to its SRA replacements.
450 uint64_t FragmentOffsetInBits,
451 uint64_t FragmentSizeInBits,
452 uint64_t VarSize) {
454 GV->getDebugInfo(GVs);
455 for (auto *GVE : GVs) {
456 DIVariable *Var = GVE->getVariable();
457 DIExpression *Expr = GVE->getExpression();
458 int64_t CurVarOffsetInBytes = 0;
459 uint64_t CurVarOffsetInBits = 0;
460 uint64_t FragmentEndInBits = FragmentOffsetInBits + FragmentSizeInBits;
461
462 // Calculate the offset (Bytes), Continue if unknown.
463 if (!Expr->extractIfOffset(CurVarOffsetInBytes))
464 continue;
465
466 // Ignore negative offset.
467 if (CurVarOffsetInBytes < 0)
468 continue;
469
470 // Convert offset to bits.
471 CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes;
472
473 // Current var starts after the fragment, ignore.
474 if (CurVarOffsetInBits >= FragmentEndInBits)
475 continue;
476
477 uint64_t CurVarSize = Var->getType()->getSizeInBits();
478 uint64_t CurVarEndInBits = CurVarOffsetInBits + CurVarSize;
479 // Current variable ends before start of fragment, ignore.
480 if (CurVarSize != 0 && /* CurVarSize is known */
481 CurVarEndInBits <= FragmentOffsetInBits)
482 continue;
483
484 // Current variable fits in (not greater than) the fragment,
485 // does not need fragment expression.
486 if (CurVarSize != 0 && /* CurVarSize is known */
487 CurVarOffsetInBits >= FragmentOffsetInBits &&
488 CurVarEndInBits <= FragmentEndInBits) {
489 uint64_t CurVarOffsetInFragment =
490 (CurVarOffsetInBits - FragmentOffsetInBits) / 8;
491 if (CurVarOffsetInFragment != 0)
492 Expr = DIExpression::get(Expr->getContext(), {dwarf::DW_OP_plus_uconst,
493 CurVarOffsetInFragment});
494 else
495 Expr = DIExpression::get(Expr->getContext(), {});
496 auto *NGVE =
497 DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
498 NGV->addDebugInfo(NGVE);
499 continue;
500 }
501 // Current variable does not fit in single fragment,
502 // emit a fragment expression.
503 if (FragmentSizeInBits < VarSize) {
504 if (CurVarOffsetInBits > FragmentOffsetInBits)
505 continue;
506 uint64_t CurVarFragmentOffsetInBits =
507 FragmentOffsetInBits - CurVarOffsetInBits;
508 uint64_t CurVarFragmentSizeInBits = FragmentSizeInBits;
509 if (CurVarSize != 0 && CurVarEndInBits < FragmentEndInBits)
510 CurVarFragmentSizeInBits -= (FragmentEndInBits - CurVarEndInBits);
511 if (CurVarOffsetInBits)
512 Expr = DIExpression::get(Expr->getContext(), {});
514 Expr, CurVarFragmentOffsetInBits, CurVarFragmentSizeInBits))
515 Expr = *E;
516 else
517 continue;
518 }
519 auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
520 NGV->addDebugInfo(NGVE);
521 }
522}
523
524/// Perform scalar replacement of aggregates on the specified global variable.
525/// This opens the door for other optimizations by exposing the behavior of the
526/// program in a more fine-grained way. We have determined that this
527/// transformation is safe already. We return the first global variable we
528/// insert so that the caller can reprocess it.
530 assert(GV->hasLocalLinkage());
531
532 // Collect types to split into.
534 if (!collectSRATypes(Parts, GV, DL) || Parts.empty())
535 return nullptr;
536
537 // Make sure we don't SRA back to the same type.
538 if (Parts.size() == 1 && Parts.begin()->second.Ty == GV->getValueType())
539 return nullptr;
540
541 // Don't perform SRA if we would have to split into many globals. Ignore
542 // parts that are either only loaded or only stored, because we expect them
543 // to be optimized away.
544 unsigned NumParts = count_if(Parts, [](const auto &Pair) {
545 return Pair.second.IsLoaded && Pair.second.IsStored;
546 });
547 if (NumParts > 16)
548 return nullptr;
549
550 // Sort by offset.
552 for (const auto &Pair : Parts) {
553 TypesVector.push_back(
554 {Pair.first, Pair.second.Ty, Pair.second.Initializer});
555 }
556 sort(TypesVector, llvm::less_first());
557
558 // Check that the types are non-overlapping.
559 uint64_t Offset = 0;
560 for (const auto &[OffsetForTy, Ty, _] : TypesVector) {
561 // Overlaps with previous type.
562 if (OffsetForTy < Offset)
563 return nullptr;
564
565 Offset = OffsetForTy + DL.getTypeAllocSize(Ty);
566 }
567
568 // Some accesses go beyond the end of the global, don't bother.
569 if (Offset > DL.getTypeAllocSize(GV->getValueType()))
570 return nullptr;
571
572 LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
573
574 // Get the alignment of the global, either explicit or target-specific.
575 Align StartAlignment =
576 DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
577 uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType());
578
579 // Create replacement globals.
581 unsigned NameSuffix = 0;
582 for (auto &[OffsetForTy, Ty, Initializer] : TypesVector) {
585 Initializer, GV->getName() + "." + Twine(NameSuffix++), GV,
587 // Start out by copying attributes from the original, including alignment.
588 NGV->copyAttributesFrom(GV);
589 NewGlobals.insert({OffsetForTy, NGV});
590
591 // Calculate the known alignment of the field. If the original aggregate
592 // had 256 byte alignment for example, then the element at a given offset
593 // may also have a known alignment, and something might depend on that:
594 // propagate info to each field.
595 Align NewAlign = commonAlignment(StartAlignment, OffsetForTy);
596 NGV->setAlignment(NewAlign);
597
598 // Copy over the debug info for the variable.
599 transferSRADebugInfo(GV, NGV, OffsetForTy * 8,
600 DL.getTypeAllocSizeInBits(Ty), VarSize);
601 }
602
603 // Replace uses of the original global with uses of the new global.
607 auto AppendUsers = [&](Value *V) {
608 for (User *U : V->users())
609 if (Visited.insert(U).second)
610 Worklist.push_back(U);
611 };
612 AppendUsers(GV);
613 while (!Worklist.empty()) {
614 Value *V = Worklist.pop_back_val();
616 isa<GEPOperator>(V)) {
617 AppendUsers(V);
618 if (isa<Instruction>(V))
619 DeadInsts.push_back(V);
620 continue;
621 }
622
623 if (Value *Ptr = getLoadStorePointerOperand(V)) {
624 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
625 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
626 /* AllowNonInbounds */ true);
627 assert(Ptr == GV && "Load/store must be from/to global");
628 GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()];
629 assert(NGV && "Must have replacement global for this offset");
630
631 // Update the pointer operand and recalculate alignment.
632 Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V));
633 Align NewAlign =
635
636 if (auto *LI = dyn_cast<LoadInst>(V)) {
637 LI->setOperand(0, NGV);
638 LI->setAlignment(NewAlign);
639 } else {
640 auto *SI = cast<StoreInst>(V);
641 SI->setOperand(1, NGV);
642 SI->setAlignment(NewAlign);
643 }
644 continue;
645 }
646
648 "Other users can only be dead constants");
649 }
650
651 // Delete old instructions and global.
654 GV->eraseFromParent();
655 ++NumSRA;
656
657 assert(NewGlobals.size() > 0);
658 return NewGlobals.begin()->second;
659}
660
661/// Return true if all users of the specified value will trap if the value is
662/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
663/// reprocessing them.
666 for (const User *U : V->users()) {
667 if (const Instruction *I = dyn_cast<Instruction>(U)) {
668 // If null pointer is considered valid, then all uses are non-trapping.
669 // Non address-space 0 globals have already been pruned by the caller.
670 if (NullPointerIsDefined(I->getFunction()))
671 return false;
672 }
673 if (isa<LoadInst>(U)) {
674 // Will trap.
675 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
676 if (SI->getOperand(0) == V) {
677 return false; // Storing the value.
678 }
679 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
680 if (CI->getCalledOperand() != V) {
681 return false; // Not calling the ptr
682 }
683 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
684 if (II->getCalledOperand() != V) {
685 return false; // Not calling the ptr
686 }
687 } else if (const AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(U)) {
688 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs))
689 return false;
690 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
691 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
692 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
693 // If we've already seen this phi node, ignore it, it has already been
694 // checked.
695 if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
696 return false;
697 } else if (isa<ICmpInst>(U) &&
698 !ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
699 isa<LoadInst>(U->getOperand(0)) &&
700 isa<ConstantPointerNull>(U->getOperand(1))) {
701 assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
702 ->getPointerOperand()
703 ->stripPointerCasts()) &&
704 "Should be GlobalVariable");
705 // This and only this kind of non-signed ICmpInst is to be replaced with
706 // the comparing of the value of the created global init bool later in
707 // optimizeGlobalAddressOfAllocation for the global variable.
708 } else {
709 return false;
710 }
711 }
712 return true;
713}
714
715/// Return true if all uses of any loads from GV will trap if the loaded value
716/// is null. Note that this also permits comparisons of the loaded value
717/// against null, as a special case.
720 Worklist.push_back(GV);
721 while (!Worklist.empty()) {
722 const Value *P = Worklist.pop_back_val();
723 for (const auto *U : P->users()) {
724 if (auto *LI = dyn_cast<LoadInst>(U)) {
725 if (!LI->isSimple())
726 return false;
728 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
729 return false;
730 } else if (auto *SI = dyn_cast<StoreInst>(U)) {
731 if (!SI->isSimple())
732 return false;
733 // Ignore stores to the global.
734 if (SI->getPointerOperand() != P)
735 return false;
736 } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
737 if (CE->stripPointerCasts() != GV)
738 return false;
739 // Check further the ConstantExpr.
740 Worklist.push_back(CE);
741 } else {
742 // We don't know or understand this user, bail out.
743 return false;
744 }
745 }
746 }
747
748 return true;
749}
750
751/// Get all the loads/store uses for global variable \p GV.
755 Worklist.push_back(GV);
756 while (!Worklist.empty()) {
757 auto *P = Worklist.pop_back_val();
758 for (auto *U : P->users()) {
759 if (auto *CE = dyn_cast<ConstantExpr>(U)) {
760 Worklist.push_back(CE);
761 continue;
762 }
763
765 "Expect only load or store instructions");
766 Uses.push_back(U);
767 }
768 }
769}
770
772 bool Changed = false;
773 for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
775 // Uses are non-trapping if null pointer is considered valid.
776 // Non address-space 0 globals are already pruned by the caller.
777 if (NullPointerIsDefined(I->getFunction()))
778 return false;
779 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
780 LI->setOperand(0, NewV);
781 Changed = true;
782 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
783 if (SI->getOperand(1) == V) {
784 SI->setOperand(1, NewV);
785 Changed = true;
786 }
787 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
789 if (CB->getCalledOperand() == V) {
790 // Calling through the pointer! Turn into a direct call, but be careful
791 // that the pointer is not also being passed as an argument.
792 CB->setCalledOperand(NewV);
793 Changed = true;
794 bool PassedAsArg = false;
795 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
796 if (CB->getArgOperand(i) == V) {
797 PassedAsArg = true;
798 CB->setArgOperand(i, NewV);
799 }
800
801 if (PassedAsArg) {
802 // Being passed as an argument also. Be careful to not invalidate UI!
803 UI = V->user_begin();
804 }
805 }
808 CI, ConstantExpr::getAddrSpaceCast(NewV, CI->getType()));
809 if (CI->use_empty()) {
810 Changed = true;
811 CI->eraseFromParent();
812 }
813 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
814 // Should handle GEP here.
816 Idxs.reserve(GEPI->getNumOperands()-1);
817 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
818 i != e; ++i)
819 if (Constant *C = dyn_cast<Constant>(*i))
820 Idxs.push_back(C);
821 else
822 break;
823 if (Idxs.size() == GEPI->getNumOperands()-1)
825 GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
826 NewV, Idxs));
827 if (GEPI->use_empty()) {
828 Changed = true;
829 GEPI->eraseFromParent();
830 }
831 }
832 }
833
834 return Changed;
835}
836
837/// The specified global has only one non-null value stored into it. If there
838/// are uses of the loaded value that would trap if the loaded value is
839/// dynamically null, then we know that they cannot be reachable with a null
840/// optimize away the load.
842 GlobalVariable *GV, Constant *LV, const DataLayout &DL,
844 bool Changed = false;
845
846 // Keep track of whether we are able to remove all the uses of the global
847 // other than the store that defines it.
848 bool AllNonStoreUsesGone = true;
849
850 // Replace all uses of loads with uses of uses of the stored value.
851 for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
852 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
854 // If we were able to delete all uses of the loads
855 if (LI->use_empty()) {
856 LI->eraseFromParent();
857 Changed = true;
858 } else {
859 AllNonStoreUsesGone = false;
860 }
861 } else if (isa<StoreInst>(GlobalUser)) {
862 // Ignore the store that stores "LV" to the global.
863 assert(GlobalUser->getOperand(1) == GV &&
864 "Must be storing *to* the global");
865 } else {
866 AllNonStoreUsesGone = false;
867
868 // If we get here we could have other crazy uses that are transitively
869 // loaded.
870 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
871 isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
872 isa<BitCastInst>(GlobalUser) ||
873 isa<GetElementPtrInst>(GlobalUser) ||
874 isa<AddrSpaceCastInst>(GlobalUser)) &&
875 "Only expect load and stores!");
876 }
877 }
878
879 if (Changed) {
880 LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
881 << "\n");
882 ++NumGlobUses;
883 }
884
885 // If we nuked all of the loads, then none of the stores are needed either,
886 // nor is the global.
887 if (AllNonStoreUsesGone) {
888 if (isLeakCheckerRoot(GV)) {
889 Changed |= CleanupPointerRootUsers(GV, GetTLI);
890 } else {
891 Changed = true;
893 }
894 if (GV->use_empty()) {
895 LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
896 Changed = true;
897 GV->eraseFromParent();
898 ++NumDeleted;
899 }
900 }
901 return Changed;
902}
903
904/// Walk the use list of V, constant folding all of the instructions that are
905/// foldable.
906static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
907 TargetLibraryInfo *TLI) {
908 for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
909 if (Instruction *I = dyn_cast<Instruction>(*UI++))
910 if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
911 I->replaceAllUsesWith(NewC);
912
913 // Advance UI to the next non-I use to avoid invalidating it!
914 // Instructions could multiply use V.
915 while (UI != E && *UI == I)
916 ++UI;
918 I->eraseFromParent();
919 }
920}
921
922/// This function takes the specified global variable, and transforms the
923/// program as if it always contained the result of the specified malloc.
924/// Because it is always the result of the specified malloc, there is no reason
925/// to actually DO the malloc. Instead, turn the malloc into a global, and any
926/// loads of GV as uses of the new global.
927static GlobalVariable *
929 uint64_t AllocSize, Constant *InitVal,
930 const DataLayout &DL,
931 TargetLibraryInfo *TLI) {
932 LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI
933 << '\n');
934
935 // Create global of type [AllocSize x i8].
936 Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()),
937 AllocSize);
938
939 // Create the new global variable. The contents of the allocated memory is
940 // undefined initially, so initialize with an undef value.
941 GlobalVariable *NewGV = new GlobalVariable(
942 *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
943 UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
944 GV->getThreadLocalMode());
945
946 // Initialize the global at the point of the original call. Note that this
947 // is a different point from the initialization referred to below for the
948 // nullability handling. Sublety: We have not proven the original global was
949 // only initialized once. As such, we can not fold this into the initializer
950 // of the new global as may need to re-init the storage multiple times.
951 if (!isa<UndefValue>(InitVal)) {
952 IRBuilder<> Builder(CI->getNextNode());
953 // TODO: Use alignment above if align!=1
954 Builder.CreateMemSet(NewGV, InitVal, AllocSize, std::nullopt);
955 }
956
957 // Update users of the allocation to use the new global instead.
958 CI->replaceAllUsesWith(NewGV);
959
960 // If there is a comparison against null, we will insert a global bool to
961 // keep track of whether the global was initialized yet or not.
962 GlobalVariable *InitBool = new GlobalVariable(
964 ConstantInt::getFalse(GV->getContext()), GV->getName() + ".init",
966 bool InitBoolUsed = false;
967
968 // Loop over all instruction uses of GV, processing them in turn.
970 allUsesOfLoadAndStores(GV, Guses);
971 for (auto *U : Guses) {
972 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
973 // The global is initialized when the store to it occurs. If the stored
974 // value is null value, the global bool is set to false, otherwise true.
975 auto *NewSI = new StoreInst(
977 SI->getValueOperand())),
978 InitBool, false, Align(1), SI->getOrdering(), SI->getSyncScopeID(),
979 SI->getIterator());
980 NewSI->setDebugLoc(SI->getDebugLoc());
981 SI->eraseFromParent();
982 continue;
983 }
984
985 LoadInst *LI = cast<LoadInst>(U);
986 while (!LI->use_empty()) {
987 Use &LoadUse = *LI->use_begin();
988 ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
989 if (!ICI) {
990 LoadUse.set(NewGV);
991 continue;
992 }
993
994 // Replace the cmp X, 0 with a use of the bool value.
995 Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
996 InitBool->getName() + ".val", false, Align(1),
997 LI->getOrdering(), LI->getSyncScopeID(),
998 LI->getIterator());
999 // FIXME: Should we use the DebugLoc of the load used by the predicate, or
1000 // the predicate? The load seems most appropriate, but there's an argument
1001 // that the new load does not represent the old load, but is simply a
1002 // component of recomputing the predicate.
1003 cast<LoadInst>(LV)->setDebugLoc(LI->getDebugLoc());
1004 InitBoolUsed = true;
1005 switch (ICI->getPredicate()) {
1006 default: llvm_unreachable("Unknown ICmp Predicate!");
1007 case ICmpInst::ICMP_ULT: // X < null -> always false
1009 break;
1010 case ICmpInst::ICMP_UGE: // X >= null -> always true
1011 LV = ConstantInt::getTrue(GV->getContext());
1012 break;
1013 case ICmpInst::ICMP_ULE:
1014 case ICmpInst::ICMP_EQ:
1015 LV = BinaryOperator::CreateNot(LV, "notinit", ICI->getIterator());
1016 cast<BinaryOperator>(LV)->setDebugLoc(ICI->getDebugLoc());
1017 break;
1018 case ICmpInst::ICMP_NE:
1019 case ICmpInst::ICMP_UGT:
1020 break; // no change.
1021 }
1022 ICI->replaceAllUsesWith(LV);
1023 ICI->eraseFromParent();
1024 }
1025 LI->eraseFromParent();
1026 }
1027
1028 // If the initialization boolean was used, insert it, otherwise delete it.
1029 if (!InitBoolUsed) {
1030 while (!InitBool->use_empty()) // Delete initializations
1031 cast<StoreInst>(InitBool->user_back())->eraseFromParent();
1032 delete InitBool;
1033 } else
1034 GV->getParent()->insertGlobalVariable(GV->getIterator(), InitBool);
1035
1036 // Now the GV is dead, nuke it and the allocation..
1037 GV->eraseFromParent();
1038 CI->eraseFromParent();
1039
1040 // To further other optimizations, loop over all users of NewGV and try to
1041 // constant prop them. This will promote GEP instructions with constant
1042 // indices into GEP constant-exprs, which will allow global-opt to hack on it.
1043 ConstantPropUsersOf(NewGV, DL, TLI);
1044
1045 return NewGV;
1046}
1047
1048/// Scan the use-list of GV checking to make sure that there are no complex uses
1049/// of GV. We permit simple things like dereferencing the pointer, but not
1050/// storing through the address, unless it is to the specified global.
1051static bool
1053 const GlobalVariable *GV) {
1056 Worklist.push_back(CI);
1057
1058 while (!Worklist.empty()) {
1059 const Value *V = Worklist.pop_back_val();
1060 if (!Visited.insert(V).second)
1061 continue;
1062
1063 for (const Use &VUse : V->uses()) {
1064 const User *U = VUse.getUser();
1065 if (isa<LoadInst>(U) || isa<CmpInst>(U))
1066 continue; // Fine, ignore.
1067
1068 if (auto *SI = dyn_cast<StoreInst>(U)) {
1069 if (SI->getValueOperand() == V &&
1070 SI->getPointerOperand()->stripPointerCasts() != GV)
1071 return false; // Storing the pointer not into GV... bad.
1072 continue; // Otherwise, storing through it, or storing into GV... fine.
1073 }
1074
1075 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1076 Worklist.push_back(GEPI);
1077 continue;
1078 }
1079
1080 return false;
1081 }
1082 }
1083
1084 return true;
1085}
1086
1087/// If we have a global that is only initialized with a fixed size allocation
1088/// try to transform the program to use global memory instead of heap
1089/// allocated memory. This eliminates dynamic allocation, avoids an indirection
1090/// accessing the data, and exposes the resultant global to further GlobalOpt.
1092 CallInst *CI,
1093 const DataLayout &DL,
1094 TargetLibraryInfo *TLI) {
1095 if (!isRemovableAlloc(CI, TLI))
1096 // Must be able to remove the call when we get done..
1097 return false;
1098
1099 Type *Int8Ty = Type::getInt8Ty(CI->getFunction()->getContext());
1100 Constant *InitVal = getInitialValueOfAllocation(CI, TLI, Int8Ty);
1101 if (!InitVal)
1102 // Must be able to emit a memset for initialization
1103 return false;
1104
1105 uint64_t AllocSize;
1106 if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts()))
1107 return false;
1108
1109 // Restrict this transformation to only working on small allocations
1110 // (2048 bytes currently), as we don't want to introduce a 16M global or
1111 // something.
1112 if (AllocSize >= 2048)
1113 return false;
1114
1115 // We can't optimize this global unless all uses of it are *known* to be
1116 // of the malloc value, not of the null initializer value (consider a use
1117 // that compares the global's value against zero to see if the malloc has
1118 // been reached). To do this, we check to see if all uses of the global
1119 // would trap if the global were null: this proves that they must all
1120 // happen after the malloc.
1122 return false;
1123
1124 // We can't optimize this if the malloc itself is used in a complex way,
1125 // for example, being stored into multiple globals. This allows the
1126 // malloc to be stored into the specified global, loaded, gep, icmp'd.
1127 // These are all things we could transform to using the global for.
1129 return false;
1130
1131 OptimizeGlobalAddressOfAllocation(GV, CI, AllocSize, InitVal, DL, TLI);
1132 return true;
1133}
1134
1135// Try to optimize globals based on the knowledge that only one value (besides
1136// its initializer) is ever stored to the global.
1137static bool
1139 const DataLayout &DL,
1141 // If we are dealing with a pointer global that is initialized to null and
1142 // only has one (non-null) value stored into it, then we can optimize any
1143 // users of the loaded value (often calls and loads) that would trap if the
1144 // value was null.
1145 if (GV->getInitializer()->getType()->isPointerTy() &&
1146 GV->getInitializer()->isNullValue() &&
1147 StoredOnceVal->getType()->isPointerTy() &&
1149 nullptr /* F */,
1151 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1152 // Optimize away any trapping uses of the loaded value.
1153 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
1154 return true;
1155 } else if (isAllocationFn(StoredOnceVal, GetTLI)) {
1156 if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
1157 auto *TLI = &GetTLI(*CI->getFunction());
1159 return true;
1160 }
1161 }
1162 }
1163
1164 return false;
1165}
1166
1167/// At this point, we have learned that the only two values ever stored into GV
1168/// are its initializer and OtherVal. See if we can shrink the global into a
1169/// boolean and select between the two values whenever it is used. This exposes
1170/// the values to other scalar optimizations.
1172 Type *GVElType = GV->getValueType();
1173
1174 // If GVElType is already i1, it is already shrunk. If the type of the GV is
1175 // an FP value, pointer or vector, don't do this optimization because a select
1176 // between them is very expensive and unlikely to lead to later
1177 // simplification. In these cases, we typically end up with "cond ? v1 : v2"
1178 // where v1 and v2 both require constant pool loads, a big loss.
1179 if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1180 GVElType->isFloatingPointTy() ||
1181 GVElType->isPointerTy() || GVElType->isVectorTy())
1182 return false;
1183
1184 // Walk the use list of the global seeing if all the uses are load or store.
1185 // If there is anything else, bail out.
1186 for (User *U : GV->users()) {
1187 if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1188 return false;
1189 if (getLoadStoreType(U) != GVElType)
1190 return false;
1191 }
1192
1193 LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
1194
1195 // Create the new global, initializing it to false.
1197 false,
1200 GV->getName()+".b",
1201 GV->getThreadLocalMode(),
1202 GV->getType()->getAddressSpace());
1203 NewGV->copyAttributesFrom(GV);
1204 GV->getParent()->insertGlobalVariable(GV->getIterator(), NewGV);
1205
1206 Constant *InitVal = GV->getInitializer();
1207 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1208 "No reason to shrink to bool!");
1209
1211 GV->getDebugInfo(GVs);
1212
1213 // If initialized to zero and storing one into the global, we can use a cast
1214 // instead of a select to synthesize the desired value.
1215 bool IsOneZero = false;
1216 bool EmitOneOrZero = true;
1217 auto *CI = dyn_cast<ConstantInt>(OtherVal);
1218 if (CI && CI->getValue().getActiveBits() <= 64) {
1219 IsOneZero = InitVal->isNullValue() && CI->isOne();
1220
1221 auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
1222 if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
1223 uint64_t ValInit = CIInit->getZExtValue();
1224 uint64_t ValOther = CI->getZExtValue();
1225 uint64_t ValMinus = ValOther - ValInit;
1226
1227 for(auto *GVe : GVs){
1228 DIGlobalVariable *DGV = GVe->getVariable();
1229 DIExpression *E = GVe->getExpression();
1230 const DataLayout &DL = GV->getDataLayout();
1231 unsigned SizeInOctets =
1232 DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8;
1233
1234 // It is expected that the address of global optimized variable is on
1235 // top of the stack. After optimization, value of that variable will
1236 // be ether 0 for initial value or 1 for other value. The following
1237 // expression should return constant integer value depending on the
1238 // value at global object address:
1239 // val * (ValOther - ValInit) + ValInit:
1240 // DW_OP_deref DW_OP_constu <ValMinus>
1241 // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1243 dwarf::DW_OP_deref_size, SizeInOctets,
1244 dwarf::DW_OP_constu, ValMinus,
1245 dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1246 dwarf::DW_OP_plus};
1247 bool WithStackValue = true;
1248 E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
1251 NewGV->addDebugInfo(DGVE);
1252 }
1253 EmitOneOrZero = false;
1254 }
1255 }
1256
1257 if (EmitOneOrZero) {
1258 // FIXME: This will only emit address for debugger on which will
1259 // be written only 0 or 1.
1260 for(auto *GV : GVs)
1261 NewGV->addDebugInfo(GV);
1262 }
1263
1264 while (!GV->use_empty()) {
1266 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1267 // Change the store into a boolean store.
1268 bool StoringOther = SI->getOperand(0) == OtherVal;
1269 // Only do this if we weren't storing a loaded value.
1270 Value *StoreVal;
1271 if (StoringOther || SI->getOperand(0) == InitVal) {
1272 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1273 StoringOther);
1274 } else {
1275 // Otherwise, we are storing a previously loaded copy. To do this,
1276 // change the copy from copying the original value to just copying the
1277 // bool.
1278 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1279
1280 // If we've already replaced the input, StoredVal will be a cast or
1281 // select instruction. If not, it will be a load of the original
1282 // global.
1283 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1284 assert(LI->getOperand(0) == GV && "Not a copy!");
1285 // Insert a new load, to preserve the saved value.
1286 StoreVal =
1287 new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
1288 false, Align(1), LI->getOrdering(),
1289 LI->getSyncScopeID(), LI->getIterator());
1290 cast<LoadInst>(StoreVal)->setDebugLoc(LI->getDebugLoc());
1291 } else {
1292 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1293 "This is not a form that we understand!");
1294 StoreVal = StoredVal->getOperand(0);
1295 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1296 }
1297 }
1298 StoreInst *NSI =
1299 new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
1300 SI->getSyncScopeID(), SI->getIterator());
1301 NSI->setDebugLoc(SI->getDebugLoc());
1302 } else {
1303 // Change the load into a load of bool then a select.
1304 LoadInst *LI = cast<LoadInst>(UI);
1305 LoadInst *NLI = new LoadInst(
1306 NewGV->getValueType(), NewGV, LI->getName() + ".b", false, Align(1),
1307 LI->getOrdering(), LI->getSyncScopeID(), LI->getIterator());
1308 Instruction *NSI;
1309 if (IsOneZero)
1310 NSI = new ZExtInst(NLI, LI->getType(), "", LI->getIterator());
1311 else
1312 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI->getIterator());
1313 NSI->takeName(LI);
1314 // Since LI is split into two instructions, NLI and NSI both inherit the
1315 // same DebugLoc
1316 NLI->setDebugLoc(LI->getDebugLoc());
1317 NSI->setDebugLoc(LI->getDebugLoc());
1318 LI->replaceAllUsesWith(NSI);
1319 }
1320 UI->eraseFromParent();
1321 }
1322
1323 // Retain the name of the old global variable. People who are debugging their
1324 // programs may expect these variables to be named the same.
1325 NewGV->takeName(GV);
1326 GV->eraseFromParent();
1327 return true;
1328}
1329
1330static bool
1332 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
1333 function_ref<void(Function &)> DeleteFnCallback = nullptr) {
1335
1336 if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1337 return false;
1338
1339 if (const Comdat *C = GV.getComdat())
1340 if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1341 return false;
1342
1343 bool Dead;
1344 if (auto *F = dyn_cast<Function>(&GV))
1345 Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1346 else
1347 Dead = GV.use_empty();
1348 if (!Dead)
1349 return false;
1350
1351 LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1352 if (auto *F = dyn_cast<Function>(&GV)) {
1353 if (DeleteFnCallback)
1354 DeleteFnCallback(*F);
1355 }
1357 GV.eraseFromParent();
1358 ++NumDeleted;
1359 return true;
1360}
1361
1363 const Function *F, GlobalValue *GV,
1364 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1365 // Find all uses of GV. We expect them all to be in F, and if we can't
1366 // identify any of the uses we bail out.
1367 //
1368 // On each of these uses, identify if the memory that GV points to is
1369 // used/required/live at the start of the function. If it is not, for example
1370 // if the first thing the function does is store to the GV, the GV can
1371 // possibly be demoted.
1372 //
1373 // We don't do an exhaustive search for memory operations - simply look
1374 // through bitcasts as they're quite common and benign.
1375 const DataLayout &DL = GV->getDataLayout();
1378 for (auto *U : GV->users()) {
1380 if (!I)
1381 return false;
1382 assert(I->getParent()->getParent() == F);
1383
1384 if (auto *LI = dyn_cast<LoadInst>(I))
1385 Loads.push_back(LI);
1386 else if (auto *SI = dyn_cast<StoreInst>(I))
1387 Stores.push_back(SI);
1388 else
1389 return false;
1390 }
1391
1392 // We have identified all uses of GV into loads and stores. Now check if all
1393 // of them are known not to depend on the value of the global at the function
1394 // entry point. We do this by ensuring that every load is dominated by at
1395 // least one store.
1396 auto &DT = LookupDomTree(*const_cast<Function *>(F));
1397
1398 // The below check is quadratic. Check we're not going to do too many tests.
1399 // FIXME: Even though this will always have worst-case quadratic time, we
1400 // could put effort into minimizing the average time by putting stores that
1401 // have been shown to dominate at least one load at the beginning of the
1402 // Stores array, making subsequent dominance checks more likely to succeed
1403 // early.
1404 //
1405 // The threshold here is fairly large because global->local demotion is a
1406 // very powerful optimization should it fire.
1407 const unsigned Threshold = 100;
1408 if (Loads.size() * Stores.size() > Threshold)
1409 return false;
1410
1411 for (auto *L : Loads) {
1412 auto *LTy = L->getType();
1413 if (none_of(Stores, [&](const StoreInst *S) {
1414 auto *STy = S->getValueOperand()->getType();
1415 // The load is only dominated by the store if DomTree says so
1416 // and the number of bits loaded in L is less than or equal to
1417 // the number of bits stored in S.
1418 return DT.dominates(S, L) &&
1419 DL.getTypeStoreSize(LTy).getFixedValue() <=
1420 DL.getTypeStoreSize(STy).getFixedValue();
1421 }))
1422 return false;
1423 }
1424 // All loads have known dependences inside F, so the global can be localized.
1425 return true;
1426}
1427
1428// For a global variable with one store, if the store dominates any loads,
1429// those loads will always load the stored value (as opposed to the
1430// initializer), even in the presence of recursion.
1432 GlobalVariable *GV, const StoreInst *StoredOnceStore,
1433 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1434 const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
1435 // We can do this optimization for non-constants in nosync + norecurse
1436 // functions, but globals used in exactly one norecurse functions are already
1437 // promoted to an alloca.
1438 if (!isa<Constant>(StoredOnceValue))
1439 return false;
1440 const Function *F = StoredOnceStore->getFunction();
1442 for (User *U : GV->users()) {
1443 if (auto *LI = dyn_cast<LoadInst>(U)) {
1444 if (LI->getFunction() == F &&
1445 LI->getType() == StoredOnceValue->getType() && LI->isSimple())
1446 Loads.push_back(LI);
1447 }
1448 }
1449 // Only compute DT if we have any loads to examine.
1450 bool MadeChange = false;
1451 if (!Loads.empty()) {
1452 auto &DT = LookupDomTree(*const_cast<Function *>(F));
1453 for (auto *LI : Loads) {
1454 if (DT.dominates(StoredOnceStore, LI)) {
1455 LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
1456 LI->eraseFromParent();
1457 MadeChange = true;
1458 }
1459 }
1460 }
1461 return MadeChange;
1462}
1463
1464/// Analyze the specified global variable and optimize
1465/// it if possible. If we make a change, return true.
1466static bool
1470 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1471 auto &DL = GV->getDataLayout();
1472 // If this is a first class global and has only one accessing function and
1473 // this function is non-recursive, we replace the global with a local alloca
1474 // in this function.
1475 //
1476 // NOTE: It doesn't make sense to promote non-single-value types since we
1477 // are just replacing static memory to stack memory.
1478 //
1479 // If the global is in different address space, don't bring it to stack.
1480 if (!GS.HasMultipleAccessingFunctions &&
1481 GS.AccessingFunction &&
1483 GV->getType()->getAddressSpace() == DL.getAllocaAddrSpace() &&
1484 !GV->isExternallyInitialized() &&
1485 GS.AccessingFunction->doesNotRecurse() &&
1486 isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1487 LookupDomTree)) {
1488 const DataLayout &DL = GV->getDataLayout();
1489
1490 LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1491 BasicBlock::iterator FirstI =
1492 GS.AccessingFunction->getEntryBlock().begin().getNonConst();
1493 Type *ElemTy = GV->getValueType();
1494 // FIXME: Pass Global's alignment when globals have alignment
1495 AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(),
1496 nullptr, GV->getName(), FirstI);
1498 if (!isa<UndefValue>(GV->getInitializer())) {
1499 auto *SI = new StoreInst(GV->getInitializer(), Alloca, FirstI);
1500 // FIXME: We're localizing a global and creating a store instruction for
1501 // the initial value of that global. Could we logically use the global
1502 // variable's (if one exists) line for this?
1503 SI->setDebugLoc(DebugLoc::getCompilerGenerated());
1504 }
1505
1506 GV->replaceAllUsesWith(Alloca);
1507 GV->eraseFromParent();
1508 ++NumLocalized;
1509 return true;
1510 }
1511
1512 bool Changed = false;
1513
1514 // If the global is never loaded (but may be stored to), it is dead.
1515 // Delete it now.
1516 if (!GS.IsLoaded) {
1517 LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1518
1519 if (isLeakCheckerRoot(GV)) {
1520 // Delete any constant stores to the global.
1521 Changed = CleanupPointerRootUsers(GV, GetTLI);
1522 } else {
1523 // Delete any stores we can find to the global. We may not be able to
1524 // make it completely dead though.
1526 }
1527
1528 // If the global is dead now, delete it.
1529 if (GV->use_empty()) {
1530 GV->eraseFromParent();
1531 ++NumDeleted;
1532 Changed = true;
1533 }
1534 return Changed;
1535
1536 }
1537 if (GS.StoredType <= GlobalStatus::InitializerStored) {
1538 LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1539
1540 // Don't actually mark a global constant if it's atomic because atomic loads
1541 // are implemented by a trivial cmpxchg in some edge-cases and that usually
1542 // requires write access to the variable even if it's not actually changed.
1543 if (GS.Ordering == AtomicOrdering::NotAtomic) {
1544 assert(!GV->isConstant() && "Expected a non-constant global");
1545 GV->setConstant(true);
1546 Changed = true;
1547 }
1548
1549 // Clean up any obviously simplifiable users now.
1551
1552 // If the global is dead now, just nuke it.
1553 if (GV->use_empty()) {
1554 LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
1555 << "all users and delete global!\n");
1556 GV->eraseFromParent();
1557 ++NumDeleted;
1558 return true;
1559 }
1560
1561 // Fall through to the next check; see if we can optimize further.
1562 ++NumMarked;
1563 }
1564 if (!GV->getInitializer()->getType()->isSingleValueType()) {
1565 const DataLayout &DL = GV->getDataLayout();
1566 if (SRAGlobal(GV, DL))
1567 return true;
1568 }
1569 Value *StoredOnceValue = GS.getStoredOnceValue();
1570 if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
1571 Function &StoreFn =
1572 const_cast<Function &>(*GS.StoredOnceStore->getFunction());
1573 bool CanHaveNonUndefGlobalInitializer =
1574 GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
1575 GV->getType()->getAddressSpace());
1576 // If the initial value for the global was an undef value, and if only
1577 // one other value was stored into it, we can just change the
1578 // initializer to be the stored value, then delete all stores to the
1579 // global. This allows us to mark it constant.
1580 // This is restricted to address spaces that allow globals to have
1581 // initializers. NVPTX, for example, does not support initializers for
1582 // shared memory (AS 3).
1583 auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
1584 if (SOVConstant && isa<UndefValue>(GV->getInitializer()) &&
1585 DL.getTypeAllocSize(SOVConstant->getType()) ==
1586 DL.getTypeAllocSize(GV->getValueType()) &&
1587 CanHaveNonUndefGlobalInitializer) {
1588 if (SOVConstant->getType() == GV->getValueType()) {
1589 // Change the initializer in place.
1590 GV->setInitializer(SOVConstant);
1591 } else {
1592 // Create a new global with adjusted type.
1593 auto *NGV = new GlobalVariable(
1594 *GV->getParent(), SOVConstant->getType(), GV->isConstant(),
1595 GV->getLinkage(), SOVConstant, "", GV, GV->getThreadLocalMode(),
1596 GV->getAddressSpace());
1597 NGV->takeName(GV);
1598 NGV->copyAttributesFrom(GV);
1599 GV->replaceAllUsesWith(NGV);
1600 GV->eraseFromParent();
1601 GV = NGV;
1602 }
1603
1604 // Clean up any obviously simplifiable users now.
1606
1607 if (GV->use_empty()) {
1608 LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
1609 << "simplify all users and delete global!\n");
1610 GV->eraseFromParent();
1611 ++NumDeleted;
1612 }
1613 ++NumSubstitute;
1614 return true;
1615 }
1616
1617 // Try to optimize globals based on the knowledge that only one value
1618 // (besides its initializer) is ever stored to the global.
1619 if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
1620 return true;
1621
1622 // Try to forward the store to any loads. If we have more than one store, we
1623 // may have a store of the initializer between StoredOnceStore and a load.
1624 if (GS.NumStores == 1)
1625 if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
1626 return true;
1627
1628 // Otherwise, if the global was not a boolean, we can shrink it to be a
1629 // boolean. Skip this optimization for AS that doesn't allow an initializer.
1630 if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
1632 CanHaveNonUndefGlobalInitializer)) {
1633 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
1634 ++NumShrunkToBool;
1635 return true;
1636 }
1637 }
1638 }
1639
1640 return Changed;
1641}
1642
1643/// Analyze the specified global variable and optimize it if possible. If we
1644/// make a change, return true.
1645static bool
1649 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1650 if (GV.getName().starts_with("llvm."))
1651 return false;
1652
1653 GlobalStatus GS;
1654
1655 if (GlobalStatus::analyzeGlobal(&GV, GS))
1656 return false;
1657
1658 bool Changed = false;
1659 if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
1660 auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
1662 if (NewUnnamedAddr != GV.getUnnamedAddr()) {
1663 GV.setUnnamedAddr(NewUnnamedAddr);
1664 NumUnnamed++;
1665 Changed = true;
1666 }
1667 }
1668
1669 // Do more involved optimizations if the global is internal.
1670 if (!GV.hasLocalLinkage())
1671 return Changed;
1672
1673 auto *GVar = dyn_cast<GlobalVariable>(&GV);
1674 if (!GVar)
1675 return Changed;
1676
1677 if (GVar->isConstant() || !GVar->hasInitializer())
1678 return Changed;
1679
1680 return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
1681 Changed;
1682}
1683
1684/// Walk all of the direct calls of the specified function, changing them to
1685/// FastCC.
1687 for (User *U : F->users())
1688 if (auto *Call = dyn_cast<CallBase>(U))
1689 if (Call->getCalledOperand() == F)
1690 Call->setCallingConv(CallingConv::Fast);
1691}
1692
1693static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
1695 unsigned AttrIndex;
1696 if (Attrs.hasAttrSomewhere(A, &AttrIndex))
1697 return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
1698 return Attrs;
1699}
1700
1702 F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
1703 for (User *U : F->users()) {
1704 CallBase *CB = cast<CallBase>(U);
1705 CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
1706 }
1707}
1708
1709/// Return true if this is a calling convention that we'd like to change. The
1710/// idea here is that we don't want to mess with the convention if the user
1711/// explicitly requested something with performance implications like coldcc,
1712/// GHC, or anyregcc.
1714 CallingConv::ID CC = F->getCallingConv();
1715
1716 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
1717 if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
1718 return false;
1719
1720 if (F->isVarArg())
1721 return false;
1722
1723 // FIXME: Change CC for the whole chain of musttail calls when possible.
1724 //
1725 // Can't change CC of the function that either has musttail calls, or is a
1726 // musttail callee itself
1727 for (User *U : F->users()) {
1729 if (!CI)
1730 continue;
1731
1732 if (CI->isMustTailCall())
1733 return false;
1734 }
1735
1736 for (BasicBlock &BB : *F)
1737 if (BB.getTerminatingMustTailCall())
1738 return false;
1739
1740 return !F->hasAddressTaken();
1741}
1742
1745 ChangeableCCCacheTy &ChangeableCCCache) {
1746 auto Res = ChangeableCCCache.try_emplace(F, false);
1747 if (Res.second)
1748 Res.first->second = hasChangeableCCImpl(F);
1749 return Res.first->second;
1750}
1751
1752/// Return true if the block containing the call site has a BlockFrequency of
1753/// less than ColdCCRelFreq% of the entry block.
1754static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
1755 const BranchProbability ColdProb(ColdCCRelFreq, 100);
1756 auto *CallSiteBB = CB.getParent();
1757 auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
1758 auto CallerEntryFreq =
1759 CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
1760 return CallSiteFreq < CallerEntryFreq * ColdProb;
1761}
1762
1763// This function checks if the input function F is cold at all call sites. It
1764// also looks each call site's containing function, returning false if the
1765// caller function contains other non cold calls. The input vector AllCallsCold
1766// contains a list of functions that only have call sites in cold blocks.
1767static bool
1770 const std::vector<Function *> &AllCallsCold) {
1771
1772 if (F.user_empty())
1773 return false;
1774
1775 for (User *U : F.users()) {
1777 if (!CB || CB->getCalledOperand() != &F)
1778 continue;
1779 Function *CallerFunc = CB->getParent()->getParent();
1780 BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
1781 if (!isColdCallSite(*CB, CallerBFI))
1782 return false;
1783 if (!llvm::is_contained(AllCallsCold, CallerFunc))
1784 return false;
1785 }
1786 return true;
1787}
1788
1790 for (User *U : F->users())
1791 if (auto *Call = dyn_cast<CallBase>(U))
1792 if (Call->getCalledOperand() == F)
1793 Call->setCallingConv(CallingConv::Cold);
1794}
1795
1796// This function iterates over all the call instructions in the input Function
1797// and checks that all call sites are in cold blocks and are allowed to use the
1798// coldcc calling convention.
1799static bool
1802 ChangeableCCCacheTy &ChangeableCCCache) {
1803 for (BasicBlock &BB : F) {
1804 for (Instruction &I : BB) {
1805 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1806 // Skip over isline asm instructions since they aren't function calls.
1807 if (CI->isInlineAsm())
1808 continue;
1809 Function *CalledFn = CI->getCalledFunction();
1810 if (!CalledFn)
1811 return false;
1812 // Skip over intrinsics since they won't remain as function calls.
1813 // Important to do this check before the linkage check below so we
1814 // won't bail out on debug intrinsics, possibly making the generated
1815 // code dependent on the presence of debug info.
1816 if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
1817 continue;
1818 if (!CalledFn->hasLocalLinkage())
1819 return false;
1820 // Check if it's valid to use coldcc calling convention.
1821 if (!hasChangeableCC(CalledFn, ChangeableCCCache))
1822 return false;
1823 BlockFrequencyInfo &CallerBFI = GetBFI(F);
1824 if (!isColdCallSite(*CI, CallerBFI))
1825 return false;
1826 }
1827 }
1828 }
1829 return true;
1830}
1831
1833 for (User *U : F->users()) {
1834 CallBase *CB = cast<CallBase>(U);
1835 if (CB->isMustTailCall())
1836 return true;
1837 }
1838 return false;
1839}
1840
1842 for (User *U : F->users())
1843 if (isa<InvokeInst>(U))
1844 return true;
1845 return false;
1846}
1847
1849 RemoveAttribute(F, Attribute::Preallocated);
1850
1851 auto *M = F->getParent();
1852
1853 IRBuilder<> Builder(M->getContext());
1854
1855 // Cannot modify users() while iterating over it, so make a copy.
1856 SmallVector<User *, 4> PreallocatedCalls(F->users());
1857 for (User *U : PreallocatedCalls) {
1859 if (!CB)
1860 continue;
1861
1862 assert(
1863 !CB->isMustTailCall() &&
1864 "Shouldn't call RemotePreallocated() on a musttail preallocated call");
1865 // Create copy of call without "preallocated" operand bundle.
1867 CB->getOperandBundlesAsDefs(OpBundles);
1868 CallBase *PreallocatedSetup = nullptr;
1869 for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
1870 if (It->getTag() == "preallocated") {
1871 PreallocatedSetup = cast<CallBase>(*It->input_begin());
1872 OpBundles.erase(It);
1873 break;
1874 }
1875 }
1876 assert(PreallocatedSetup && "Did not find preallocated bundle");
1877 uint64_t ArgCount =
1878 cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();
1879
1880 assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
1881 "Unknown indirect call type");
1882 CallBase *NewCB = CallBase::Create(CB, OpBundles, CB->getIterator());
1883 CB->replaceAllUsesWith(NewCB);
1884 NewCB->takeName(CB);
1885 CB->eraseFromParent();
1886
1887 Builder.SetInsertPoint(PreallocatedSetup);
1888 auto *StackSave = Builder.CreateStackSave();
1889 Builder.SetInsertPoint(NewCB->getNextNode());
1890 Builder.CreateStackRestore(StackSave);
1891
1892 // Replace @llvm.call.preallocated.arg() with alloca.
1893 // Cannot modify users() while iterating over it, so make a copy.
1894 // @llvm.call.preallocated.arg() can be called with the same index multiple
1895 // times. So for each @llvm.call.preallocated.arg(), we see if we have
1896 // already created a Value* for the index, and if not, create an alloca and
1897 // bitcast right after the @llvm.call.preallocated.setup() so that it
1898 // dominates all uses.
1899 SmallVector<Value *, 2> ArgAllocas(ArgCount);
1900 SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
1901 for (auto *User : PreallocatedArgs) {
1902 auto *UseCall = cast<CallBase>(User);
1903 assert(UseCall->getCalledFunction()->getIntrinsicID() ==
1904 Intrinsic::call_preallocated_arg &&
1905 "preallocated token use was not a llvm.call.preallocated.arg");
1906 uint64_t AllocArgIndex =
1907 cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
1908 Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
1909 if (!AllocaReplacement) {
1910 auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
1911 auto *ArgType =
1912 UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
1913 auto *InsertBefore = PreallocatedSetup->getNextNode();
1914 Builder.SetInsertPoint(InsertBefore);
1915 auto *Alloca =
1916 Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
1917 ArgAllocas[AllocArgIndex] = Alloca;
1918 AllocaReplacement = Alloca;
1919 }
1920
1921 UseCall->replaceAllUsesWith(AllocaReplacement);
1922 UseCall->eraseFromParent();
1923 }
1924 // Remove @llvm.call.preallocated.setup().
1925 cast<Instruction>(PreallocatedSetup)->eraseFromParent();
1926 }
1927}
1928
1929static bool
1934 function_ref<DominatorTree &(Function &)> LookupDomTree,
1935 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
1936 function_ref<void(Function &F)> ChangedCFGCallback,
1937 function_ref<void(Function &F)> DeleteFnCallback) {
1938
1939 bool Changed = false;
1940
1941 ChangeableCCCacheTy ChangeableCCCache;
1942 std::vector<Function *> AllCallsCold;
1944 if (hasOnlyColdCalls(F, GetBFI, ChangeableCCCache))
1945 AllCallsCold.push_back(&F);
1946
1947 // Optimize functions.
1949 // Don't perform global opt pass on naked functions; we don't want fast
1950 // calling conventions for naked functions.
1951 if (F.hasFnAttribute(Attribute::Naked))
1952 continue;
1953
1954 // Functions without names cannot be referenced outside this module.
1955 if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
1956 F.setLinkage(GlobalValue::InternalLinkage);
1957
1958 if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) {
1959 Changed = true;
1960 continue;
1961 }
1962
1963 // LLVM's definition of dominance allows instructions that are cyclic
1964 // in unreachable blocks, e.g.:
1965 // %pat = select i1 %condition, @global, i16* %pat
1966 // because any instruction dominates an instruction in a block that's
1967 // not reachable from entry.
1968 // So, remove unreachable blocks from the function, because a) there's
1969 // no point in analyzing them and b) GlobalOpt should otherwise grow
1970 // some more complicated logic to break these cycles.
1971 // Notify the analysis manager that we've modified the function's CFG.
1972 if (!F.isDeclaration()) {
1974 Changed = true;
1975 ChangedCFGCallback(F);
1976 }
1977 }
1978
1979 Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree);
1980
1981 if (!F.hasLocalLinkage())
1982 continue;
1983
1984 // If we have an inalloca parameter that we can safely remove the
1985 // inalloca attribute from, do so. This unlocks optimizations that
1986 // wouldn't be safe in the presence of inalloca.
1987 // FIXME: We should also hoist alloca affected by this to the entry
1988 // block if possible.
1989 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
1990 !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) {
1991 RemoveAttribute(&F, Attribute::InAlloca);
1992 Changed = true;
1993 }
1994
1995 // FIXME: handle invokes
1996 // FIXME: handle musttail
1997 if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
1998 if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
1999 !hasInvokeCallers(&F)) {
2001 Changed = true;
2002 }
2003 continue;
2004 }
2005
2006 if (hasChangeableCC(&F, ChangeableCCCache)) {
2007 NumInternalFunc++;
2008 TargetTransformInfo &TTI = GetTTI(F);
2009 // Change the calling convention to coldcc if either stress testing is
2010 // enabled or the target would like to use coldcc on functions which are
2011 // cold at all call sites and the callers contain no other non coldcc
2012 // calls.
2014 (TTI.useColdCCForColdCall(F) &&
2015 isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
2016 ChangeableCCCache.erase(&F);
2017 F.setCallingConv(CallingConv::Cold);
2019 Changed = true;
2020 NumColdCC++;
2021 }
2022 }
2023
2024 if (hasChangeableCC(&F, ChangeableCCCache)) {
2025 // If this function has a calling convention worth changing, is not a
2026 // varargs function, is only called directly, and is supported by the
2027 // target, promote it to use the Fast calling convention.
2028 TargetTransformInfo &TTI = GetTTI(F);
2029 if (TTI.useFastCCForInternalCall(F)) {
2030 F.setCallingConv(CallingConv::Fast);
2032 ++NumFastCallFns;
2033 Changed = true;
2034 }
2035 }
2036
2037 if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2038 !F.hasAddressTaken()) {
2039 // The function is not used by a trampoline intrinsic, so it is safe
2040 // to remove the 'nest' attribute.
2041 RemoveAttribute(&F, Attribute::Nest);
2042 ++NumNestRemoved;
2043 Changed = true;
2044 }
2045 }
2046 return Changed;
2047}
2048
2049static bool
2053 function_ref<DominatorTree &(Function &)> LookupDomTree,
2054 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2055 bool Changed = false;
2056
2057 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
2058 // Global variables without names cannot be referenced outside this module.
2059 if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
2061 // Simplify the initializer.
2062 if (GV.hasInitializer())
2063 if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
2064 auto &DL = M.getDataLayout();
2065 // TLI is not used in the case of a Constant, so use default nullptr
2066 // for that optional parameter, since we don't have a Function to
2067 // provide GetTLI anyway.
2068 Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
2069 if (New != C)
2070 GV.setInitializer(New);
2071 }
2072
2073 if (deleteIfDead(GV, NotDiscardableComdats)) {
2074 Changed = true;
2075 continue;
2076 }
2077
2078 Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
2079 }
2080 return Changed;
2081}
2082
2083/// Evaluate static constructors in the function, if we can. Return true if we
2084/// can, false otherwise.
2086 TargetLibraryInfo *TLI) {
2087 // Skip external functions.
2088 if (F->isDeclaration())
2089 return false;
2090 // Call the function.
2091 Evaluator Eval(DL, TLI);
2092 Constant *RetValDummy;
2093 bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2095
2096 if (EvalSuccess) {
2097 ++NumCtorsEvaluated;
2098
2099 // We succeeded at evaluation: commit the result.
2100 auto NewInitializers = Eval.getMutatedInitializers();
2101 LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2102 << F->getName() << "' to " << NewInitializers.size()
2103 << " stores.\n");
2104 for (const auto &Pair : NewInitializers)
2105 Pair.first->setInitializer(Pair.second);
2106 for (GlobalVariable *GV : Eval.getInvariants())
2107 GV->setConstant(true);
2108 }
2109
2110 return EvalSuccess;
2111}
2112
2113static int compareNames(Constant *const *A, Constant *const *B) {
2114 Value *AStripped = (*A)->stripPointerCasts();
2115 Value *BStripped = (*B)->stripPointerCasts();
2116 return AStripped->getName().compare(BStripped->getName());
2117}
2118
2121 if (Init.empty()) {
2122 V.eraseFromParent();
2123 return;
2124 }
2125
2126 // Get address space of pointers in the array of pointers.
2127 const Type *UsedArrayType = V.getValueType();
2128 const auto *VAT = cast<ArrayType>(UsedArrayType);
2129 const auto *VEPT = cast<PointerType>(VAT->getArrayElementType());
2130
2131 // Type of pointer to the array of pointers.
2132 PointerType *PtrTy =
2133 PointerType::get(V.getContext(), VEPT->getAddressSpace());
2134
2136 for (GlobalValue *GV : Init) {
2138 UsedArray.push_back(Cast);
2139 }
2140
2141 // Sort to get deterministic order.
2142 array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2143 ArrayType *ATy = ArrayType::get(PtrTy, UsedArray.size());
2144
2145 Module *M = V.getParent();
2146 V.removeFromParent();
2147 GlobalVariable *NV =
2149 ConstantArray::get(ATy, UsedArray), "");
2150 NV->takeName(&V);
2151 NV->setSection("llvm.metadata");
2152 delete &V;
2153}
2154
2155namespace {
2156
2157/// An easy to access representation of llvm.used and llvm.compiler.used.
2158class LLVMUsed {
2159 SmallPtrSet<GlobalValue *, 4> Used;
2160 SmallPtrSet<GlobalValue *, 4> CompilerUsed;
2161 GlobalVariable *UsedV;
2162 GlobalVariable *CompilerUsedV;
2163
2164public:
2165 LLVMUsed(Module &M) {
2167 UsedV = collectUsedGlobalVariables(M, Vec, false);
2168 Used = {llvm::from_range, Vec};
2169 Vec.clear();
2170 CompilerUsedV = collectUsedGlobalVariables(M, Vec, true);
2171 CompilerUsed = {llvm::from_range, Vec};
2172 }
2173
2174 using iterator = SmallPtrSet<GlobalValue *, 4>::iterator;
2175 using used_iterator_range = iterator_range<iterator>;
2176
2177 iterator usedBegin() { return Used.begin(); }
2178 iterator usedEnd() { return Used.end(); }
2179
2180 used_iterator_range used() {
2181 return used_iterator_range(usedBegin(), usedEnd());
2182 }
2183
2184 iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2185 iterator compilerUsedEnd() { return CompilerUsed.end(); }
2186
2187 used_iterator_range compilerUsed() {
2188 return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2189 }
2190
2191 bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2192
2193 bool compilerUsedCount(GlobalValue *GV) const {
2194 return CompilerUsed.count(GV);
2195 }
2196
2197 bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2198 bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2199 bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2200
2201 bool compilerUsedInsert(GlobalValue *GV) {
2202 return CompilerUsed.insert(GV).second;
2203 }
2204
2205 void syncVariablesAndSets() {
2206 if (UsedV)
2207 setUsedInitializer(*UsedV, Used);
2208 if (CompilerUsedV)
2209 setUsedInitializer(*CompilerUsedV, CompilerUsed);
2210 }
2211};
2212
2213} // end anonymous namespace
2214
2215static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2216 if (GA.use_empty()) // No use at all.
2217 return false;
2218
2219 assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2220 "We should have removed the duplicated "
2221 "element from llvm.compiler.used");
2222 if (!GA.hasOneUse())
2223 // Strictly more than one use. So at least one is not in llvm.used and
2224 // llvm.compiler.used.
2225 return true;
2226
2227 // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2228 return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2229}
2230
2231static bool mayHaveOtherReferences(GlobalValue &GV, const LLVMUsed &U) {
2232 if (!GV.hasLocalLinkage())
2233 return true;
2234
2235 return U.usedCount(&GV) || U.compilerUsedCount(&GV);
2236}
2237
2238static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2239 bool &RenameTarget) {
2240 if (GA.isWeakForLinker())
2241 return false;
2242
2243 RenameTarget = false;
2244 bool Ret = false;
2245 if (hasUseOtherThanLLVMUsed(GA, U))
2246 Ret = true;
2247
2248 // If the alias is externally visible, we may still be able to simplify it.
2249 if (!mayHaveOtherReferences(GA, U))
2250 return Ret;
2251
2252 // If the aliasee has internal linkage and no other references (e.g.,
2253 // @llvm.used, @llvm.compiler.used), give it the name and linkage of the
2254 // alias, and delete the alias. This turns:
2255 // define internal ... @f(...)
2256 // @a = alias ... @f
2257 // into:
2258 // define ... @a(...)
2259 Constant *Aliasee = GA.getAliasee();
2262 return Ret;
2263
2264 RenameTarget = true;
2265 return true;
2266}
2267
2268static bool
2270 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2271 bool Changed = false;
2272 LLVMUsed Used(M);
2273
2274 for (GlobalValue *GV : Used.used())
2275 Used.compilerUsedErase(GV);
2276
2277 // Return whether GV is explicitly or implicitly dso_local and not replaceable
2278 // by another definition in the current linkage unit.
2279 auto IsModuleLocal = [](GlobalValue &GV) {
2281 (GV.isDSOLocal() || GV.isImplicitDSOLocal());
2282 };
2283
2284 for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
2285 // Aliases without names cannot be referenced outside this module.
2286 if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
2287 J.setLinkage(GlobalValue::InternalLinkage);
2288
2289 if (deleteIfDead(J, NotDiscardableComdats)) {
2290 Changed = true;
2291 continue;
2292 }
2293
2294 // If the alias can change at link time, nothing can be done - bail out.
2295 if (!IsModuleLocal(J))
2296 continue;
2297
2298 Constant *Aliasee = J.getAliasee();
2300 // We can't trivially replace the alias with the aliasee if the aliasee is
2301 // non-trivial in some way. We also can't replace the alias with the aliasee
2302 // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
2303 // alias can be used to access the definition as if preemption did not
2304 // happen.
2305 // TODO: Try to handle non-zero GEPs of local aliasees.
2306 if (!Target || !IsModuleLocal(*Target))
2307 continue;
2308
2309 Target->removeDeadConstantUsers();
2310
2311 // Make all users of the alias use the aliasee instead.
2312 bool RenameTarget;
2313 if (!hasUsesToReplace(J, Used, RenameTarget))
2314 continue;
2315
2316 J.replaceAllUsesWith(Aliasee);
2317 ++NumAliasesResolved;
2318 Changed = true;
2319
2320 if (RenameTarget) {
2321 // Give the aliasee the name, linkage and other attributes of the alias.
2322 Target->takeName(&J);
2323 Target->setLinkage(J.getLinkage());
2324 Target->setDSOLocal(J.isDSOLocal());
2325 Target->setVisibility(J.getVisibility());
2326 Target->setDLLStorageClass(J.getDLLStorageClass());
2327
2328 if (Used.usedErase(&J))
2329 Used.usedInsert(Target);
2330
2331 if (Used.compilerUsedErase(&J))
2332 Used.compilerUsedInsert(Target);
2333 } else if (mayHaveOtherReferences(J, Used))
2334 continue;
2335
2336 // Delete the alias.
2337 M.eraseAlias(&J);
2338 ++NumAliasesRemoved;
2339 Changed = true;
2340 }
2341
2342 Used.syncVariablesAndSets();
2343
2344 return Changed;
2345}
2346
2347static Function *
2350 LibFunc Func) {
2351 // Hack to get a default TLI before we have actual Function.
2352 auto FuncIter = M.begin();
2353 if (FuncIter == M.end())
2354 return nullptr;
2355 auto *TLI = &GetTLI(*FuncIter);
2356
2357 if (!TLI->has(Func))
2358 return nullptr;
2359
2360 Function *Fn = M.getFunction(TLI->getName(Func));
2361 if (!Fn)
2362 return nullptr;
2363
2364 // Now get the actual TLI for Fn.
2365 TLI = &GetTLI(*Fn);
2366
2367 // Make sure that the function has the correct prototype.
2368 LibFunc F;
2369 if (!TLI->getLibFunc(*Fn, F) || F != Func)
2370 return nullptr;
2371
2372 return Fn;
2373}
2374
2375/// Returns whether the given function is an empty C++ destructor or atexit
2376/// handler and can therefore be eliminated. Note that we assume that other
2377/// optimization passes have already simplified the code so we simply check for
2378/// 'ret'.
2379static bool IsEmptyAtExitFunction(const Function &Fn) {
2380 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2381 // nounwind, but that doesn't seem worth doing.
2382 if (Fn.isDeclaration())
2383 return false;
2384
2385 for (const auto &I : Fn.getEntryBlock()) {
2386 if (I.isDebugOrPseudoInst())
2387 continue;
2388 if (isa<ReturnInst>(I))
2389 return true;
2390 break;
2391 }
2392 return false;
2393}
2394
2395static bool OptimizeEmptyGlobalAtExitDtors(Function *CXAAtExitFn, bool isCXX) {
2396 /// Itanium C++ ABI p3.3.5:
2397 ///
2398 /// After constructing a global (or local static) object, that will require
2399 /// destruction on exit, a termination function is registered as follows:
2400 ///
2401 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2402 ///
2403 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2404 /// call f(p) when DSO d is unloaded, before all such termination calls
2405 /// registered before this one. It returns zero if registration is
2406 /// successful, nonzero on failure.
2407
2408 // This pass will look for calls to __cxa_atexit or atexit where the function
2409 // is trivial and remove them.
2410 bool Changed = false;
2411
2412 for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
2413 // We're only interested in calls. Theoretically, we could handle invoke
2414 // instructions as well, but neither llvm-gcc nor clang generate invokes
2415 // to __cxa_atexit.
2417 if (!CI)
2418 continue;
2419
2420 Function *DtorFn =
2422 if (!DtorFn || !IsEmptyAtExitFunction(*DtorFn))
2423 continue;
2424
2425 // Just remove the call.
2427 CI->eraseFromParent();
2428
2429 if (isCXX)
2430 ++NumCXXDtorsRemoved;
2431 else
2432 ++NumAtExitRemoved;
2433
2434 Changed |= true;
2435 }
2436
2437 return Changed;
2438}
2439
2441 if (IF.isInterposable())
2442 return nullptr;
2443
2445 if (!Resolver)
2446 return nullptr;
2447
2448 if (Resolver->isInterposable())
2449 return nullptr;
2450
2451 // Only handle functions that have been optimized into a single basic block.
2452 auto It = Resolver->begin();
2453 if (++It != Resolver->end())
2454 return nullptr;
2455
2456 BasicBlock &BB = Resolver->getEntryBlock();
2457
2458 if (any_of(BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
2459 return nullptr;
2460
2461 auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
2462 if (!Ret)
2463 return nullptr;
2464
2465 return dyn_cast<Function>(Ret->getReturnValue());
2466}
2467
2468/// Find IFuncs that have resolvers that always point at the same statically
2469/// known callee, and replace their callers with a direct call.
2471 bool Changed = false;
2472 for (GlobalIFunc &IF : M.ifuncs())
2474 if (!IF.use_empty() &&
2475 (!Callee->isDeclaration() ||
2476 none_of(IF.users(), [](User *U) { return isa<GlobalAlias>(U); }))) {
2477 IF.replaceAllUsesWith(Callee);
2478 NumIFuncsResolved++;
2479 Changed = true;
2480 }
2481 return Changed;
2482}
2483
2484static bool
2486 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2487 bool Changed = false;
2488 for (GlobalIFunc &IF : make_early_inc_range(M.ifuncs()))
2489 if (deleteIfDead(IF, NotDiscardableComdats)) {
2490 NumIFuncsDeleted++;
2491 Changed = true;
2492 }
2493 return Changed;
2494}
2495
2496// Follows the use-def chain of \p V backwards until it finds a Function,
2497// in which case it collects in \p Versions. Return true on successful
2498// use-def chain traversal, false otherwise.
2499static bool
2502 if (auto *F = dyn_cast<Function>(V)) {
2503 if (!GetTTI(*F).isMultiversionedFunction(*F))
2504 return false;
2505 Versions.push_back(F);
2506 } else if (auto *Sel = dyn_cast<SelectInst>(V)) {
2507 if (!collectVersions(Sel->getTrueValue(), Versions, GetTTI))
2508 return false;
2509 if (!collectVersions(Sel->getFalseValue(), Versions, GetTTI))
2510 return false;
2511 } else if (auto *Phi = dyn_cast<PHINode>(V)) {
2512 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
2513 if (!collectVersions(Phi->getIncomingValue(I), Versions, GetTTI))
2514 return false;
2515 } else {
2516 // Unknown instruction type. Bail.
2517 return false;
2518 }
2519 return true;
2520}
2521
2522// Try to statically resolve calls to versioned functions when possible. First
2523// we identify the function versions which are associated with an IFUNC symbol.
2524// We do that by examining the resolver function of the IFUNC. Once we have
2525// collected all the function versions, we sort them in decreasing priority
2526// order. This is necessary for determining the most suitable callee version
2527// for each caller version. We then collect all the callsites to versioned
2528// functions. The static resolution is performed by comparing the feature sets
2529// between callers and callees. Specifically:
2530// * Start a walk over caller and callee lists simultaneously in order of
2531// decreasing priority.
2532// * Statically resolve calls from the current caller to the current callee,
2533// iff the caller feature bits are a superset of the callee feature bits.
2534// * For FMV callers, as long as the caller feature bits are a subset of the
2535// callee feature bits, advance to the next callee. This effectively prevents
2536// considering the current callee as a candidate for static resolution by
2537// following callers (explanation: preceding callers would not have been
2538// selected in a hypothetical runtime execution).
2539// * Advance to the next caller.
2540//
2541// Presentation in EuroLLVM2025:
2542// https://www.youtube.com/watch?v=k54MFimPz-A&t=867s
2545 bool Changed = false;
2546
2547 // Map containing the feature bits for a given function.
2548 DenseMap<Function *, APInt> FeatureMask;
2549 // Map containing all the function versions corresponding to an IFunc symbol.
2551 // Map containing the IFunc symbol a function is version of.
2553 // List of all the interesting IFuncs found in the module.
2555
2556 for (GlobalIFunc &IF : M.ifuncs()) {
2557 LLVM_DEBUG(dbgs() << "Examining IFUNC " << IF.getName() << "\n");
2558
2559 if (IF.isInterposable())
2560 continue;
2561
2562 Function *Resolver = IF.getResolverFunction();
2563 if (!Resolver)
2564 continue;
2565
2566 if (Resolver->isInterposable())
2567 continue;
2568
2569 SmallVector<Function *> Versions;
2570 // Discover the versioned functions.
2571 if (any_of(*Resolver, [&](BasicBlock &BB) {
2572 if (auto *Ret = dyn_cast_or_null<ReturnInst>(BB.getTerminator()))
2573 if (!collectVersions(Ret->getReturnValue(), Versions, GetTTI))
2574 return true;
2575 return false;
2576 }))
2577 continue;
2578
2579 if (Versions.empty())
2580 continue;
2581
2582 for (Function *V : Versions) {
2583 VersionOf.insert({V, &IF});
2584 auto [It, Inserted] = FeatureMask.try_emplace(V);
2585 if (Inserted)
2586 It->second = GetTTI(*V).getFeatureMask(*V);
2587 }
2588
2589 // Sort function versions in decreasing priority order.
2590 sort(Versions, [&](auto *LHS, auto *RHS) {
2591 return FeatureMask[LHS].ugt(FeatureMask[RHS]);
2592 });
2593
2594 IFuncs.push_back(&IF);
2595 VersionedFuncs.try_emplace(&IF, std::move(Versions));
2596 }
2597
2598 for (GlobalIFunc *CalleeIF : IFuncs) {
2599 SmallVector<Function *> NonFMVCallers;
2600 DenseSet<GlobalIFunc *> CallerIFuncs;
2602
2603 // Find the callsites.
2604 for (User *U : CalleeIF->users()) {
2605 if (auto *CB = dyn_cast<CallBase>(U)) {
2606 if (CB->getCalledOperand() == CalleeIF) {
2607 Function *Caller = CB->getFunction();
2608 GlobalIFunc *CallerIF = nullptr;
2609 TargetTransformInfo &TTI = GetTTI(*Caller);
2610 bool CallerIsFMV = TTI.isMultiversionedFunction(*Caller);
2611 // The caller is a version of a known IFunc.
2612 if (auto It = VersionOf.find(Caller); It != VersionOf.end())
2613 CallerIF = It->second;
2614 else if (!CallerIsFMV && OptimizeNonFMVCallers) {
2615 // The caller is non-FMV.
2616 auto [It, Inserted] = FeatureMask.try_emplace(Caller);
2617 if (Inserted)
2618 It->second = TTI.getFeatureMask(*Caller);
2619 } else
2620 // The caller is none of the above, skip.
2621 continue;
2622 auto [It, Inserted] = CallSites.try_emplace(Caller);
2623 if (Inserted) {
2624 if (CallerIsFMV)
2625 CallerIFuncs.insert(CallerIF);
2626 else
2627 NonFMVCallers.push_back(Caller);
2628 }
2629 It->second.push_back(CB);
2630 }
2631 }
2632 }
2633
2634 if (CallSites.empty())
2635 continue;
2636
2637 LLVM_DEBUG(dbgs() << "Statically resolving calls to function "
2638 << CalleeIF->getResolverFunction()->getName() << "\n");
2639
2640 // The complexity of this algorithm is linear: O(NumCallers + NumCallees)
2641 // if NumCallers > MaxIFuncVersions || NumCallees > MaxIFuncVersions,
2642 // otherwise it is cubic: O((NumCallers ^ 2) x NumCallees).
2643 auto staticallyResolveCalls = [&](ArrayRef<Function *> Callers,
2644 ArrayRef<Function *> Callees,
2645 bool CallerIsFMV) {
2646 bool AllowExpensiveChecks = CallerIsFMV &&
2647 Callers.size() <= MaxIFuncVersions &&
2648 Callees.size() <= MaxIFuncVersions;
2649 // Index to the highest callee candidate.
2650 unsigned J = 0;
2651
2652 for (unsigned I = 0, E = Callers.size(); I < E; ++I) {
2653 // There are no callee candidates left.
2654 if (J == Callees.size())
2655 break;
2656
2657 Function *Caller = Callers[I];
2658 APInt CallerBits = FeatureMask[Caller];
2659
2660 // Compare the feature bits of the best callee candidate with all the
2661 // caller versions preceeding the current one. For each prior caller
2662 // discard feature bits that are known to be available in the current
2663 // caller. As long as the known missing feature bits are a subset of the
2664 // callee feature bits, advance to the next callee and start over.
2665 auto eliminateAvailableFeatures = [&](unsigned BestCandidate) {
2666 unsigned K = 0;
2667 while (K < I && BestCandidate < Callees.size()) {
2668 APInt MissingBits = FeatureMask[Callers[K]] & ~CallerBits;
2669 if (MissingBits.isSubsetOf(FeatureMask[Callees[BestCandidate]])) {
2670 ++BestCandidate;
2671 // Start over.
2672 K = 0;
2673 } else
2674 ++K;
2675 }
2676 return BestCandidate;
2677 };
2678
2679 unsigned BestCandidate =
2680 AllowExpensiveChecks ? eliminateAvailableFeatures(J) : J;
2681 // No callee candidate was found for this caller.
2682 if (BestCandidate == Callees.size())
2683 continue;
2684
2685 LLVM_DEBUG(dbgs() << " Examining "
2686 << (CallerIsFMV ? "FMV" : "regular") << " caller "
2687 << Caller->getName() << "\n");
2688
2689 Function *Callee = Callees[BestCandidate];
2690 APInt CalleeBits = FeatureMask[Callee];
2691
2692 // Statically resolve calls from the current caller to the current
2693 // callee, iff the caller feature bits are a superset of the callee
2694 // feature bits.
2695 if (CalleeBits.isSubsetOf(CallerBits)) {
2696 // Not all caller versions are necessarily users of the callee IFUNC.
2697 if (auto It = CallSites.find(Caller); It != CallSites.end()) {
2698 for (CallBase *CS : It->second) {
2699 LLVM_DEBUG(dbgs() << " Redirecting call " << Caller->getName()
2700 << " -> " << Callee->getName() << "\n");
2701 CS->setCalledOperand(Callee);
2702 }
2703 Changed = true;
2704 }
2705 }
2706
2707 // Nothing else to do about non-FMV callers.
2708 if (!CallerIsFMV)
2709 continue;
2710
2711 // For FMV callers, as long as the caller feature bits are a subset of
2712 // the callee feature bits, advance to the next callee. This effectively
2713 // prevents considering the current callee as a candidate for static
2714 // resolution by following callers.
2715 while (CallerBits.isSubsetOf(FeatureMask[Callees[J]]) &&
2716 ++J < Callees.size())
2717 ;
2718 }
2719 };
2720
2721 auto &Callees = VersionedFuncs[CalleeIF];
2722
2723 // Optimize non-FMV calls.
2725 staticallyResolveCalls(NonFMVCallers, Callees, /*CallerIsFMV=*/false);
2726
2727 // Optimize FMV calls.
2728 for (GlobalIFunc *CallerIF : CallerIFuncs) {
2729 auto &Callers = VersionedFuncs[CallerIF];
2730 staticallyResolveCalls(Callers, Callees, /*CallerIsFMV=*/true);
2731 }
2732
2733 if (CalleeIF->use_empty() ||
2734 all_of(CalleeIF->users(), [](User *U) { return isa<GlobalAlias>(U); }))
2735 NumIFuncsResolved++;
2736 }
2737 return Changed;
2738}
2739
2740static bool
2745 function_ref<DominatorTree &(Function &)> LookupDomTree,
2746 function_ref<void(Function &F)> ChangedCFGCallback,
2747 function_ref<void(Function &F)> DeleteFnCallback) {
2748 SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
2749 bool Changed = false;
2750 bool LocalChange = true;
2751 std::optional<uint32_t> FirstNotFullyEvaluatedPriority;
2752
2753 while (LocalChange) {
2754 LocalChange = false;
2755
2756 NotDiscardableComdats.clear();
2757 for (const GlobalVariable &GV : M.globals())
2758 if (const Comdat *C = GV.getComdat())
2759 if (!GV.isDiscardableIfUnused() || !GV.use_empty())
2760 NotDiscardableComdats.insert(C);
2761 for (Function &F : M)
2762 if (const Comdat *C = F.getComdat())
2763 if (!F.isDefTriviallyDead())
2764 NotDiscardableComdats.insert(C);
2765 for (GlobalAlias &GA : M.aliases())
2766 if (const Comdat *C = GA.getComdat())
2767 if (!GA.isDiscardableIfUnused() || !GA.use_empty())
2768 NotDiscardableComdats.insert(C);
2769
2770 // Delete functions that are trivially dead, ccc -> fastcc
2771 LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
2772 NotDiscardableComdats, ChangedCFGCallback,
2773 DeleteFnCallback);
2774
2775 // Optimize global_ctors list.
2776 LocalChange |=
2777 optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) {
2778 if (FirstNotFullyEvaluatedPriority &&
2779 *FirstNotFullyEvaluatedPriority != Priority)
2780 return false;
2781 bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F));
2782 if (!Evaluated)
2783 FirstNotFullyEvaluatedPriority = Priority;
2784 return Evaluated;
2785 });
2786
2787 // Optimize non-address-taken globals.
2788 LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
2789 NotDiscardableComdats);
2790
2791 // Resolve aliases, when possible.
2792 LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2793
2794 // Try to remove trivial global destructors if they are not removed
2795 // already.
2796 if (Function *CXAAtExitFn =
2797 FindAtExitLibFunc(M, GetTLI, LibFunc_cxa_atexit))
2798 LocalChange |= OptimizeEmptyGlobalAtExitDtors(CXAAtExitFn, true);
2799
2800 if (Function *AtExitFn = FindAtExitLibFunc(M, GetTLI, LibFunc_atexit))
2801 LocalChange |= OptimizeEmptyGlobalAtExitDtors(AtExitFn, false);
2802
2803 // Optimize IFuncs whose callee's are statically known.
2804 LocalChange |= OptimizeStaticIFuncs(M);
2805
2806 // Optimize IFuncs based on the target features of the caller.
2807 LocalChange |= OptimizeNonTrivialIFuncs(M, GetTTI);
2808
2809 // Remove any IFuncs that are now dead.
2810 LocalChange |= DeleteDeadIFuncs(M, NotDiscardableComdats);
2811
2812 Changed |= LocalChange;
2813 }
2814
2815 // TODO: Move all global ctors functions to the end of the module for code
2816 // layout.
2817
2818 return Changed;
2819}
2820
2822 auto &DL = M.getDataLayout();
2823 auto &FAM =
2825 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2826 return FAM.getResult<DominatorTreeAnalysis>(F);
2827 };
2828 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2829 return FAM.getResult<TargetLibraryAnalysis>(F);
2830 };
2831 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
2832 return FAM.getResult<TargetIRAnalysis>(F);
2833 };
2834
2835 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
2836 return FAM.getResult<BlockFrequencyAnalysis>(F);
2837 };
2838 auto ChangedCFGCallback = [&FAM](Function &F) {
2839 FAM.invalidate(F, PreservedAnalyses::none());
2840 };
2841 auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); };
2842
2843 if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
2844 ChangedCFGCallback, DeleteFnCallback))
2845 return PreservedAnalyses::all();
2846
2848 // We made sure to clear analyses for deleted functions.
2850 // The only place we modify the CFG is when calling
2851 // removeUnreachableBlocks(), but there we make sure to invalidate analyses
2852 // for modified functions.
2854 return PA;
2855}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file contains constants used for implementing Dwarf debug support.
static bool IsSafeComputationToRemove(Value *V, function_ref< TargetLibraryInfo &(Function &)> GetTLI)
Given a value that is stored to a global but never read, determine whether it's safe to remove the st...
static Function * FindAtExitLibFunc(Module &M, function_ref< TargetLibraryInfo &(Function &)> GetTLI, LibFunc Func)
static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, const DataLayout &DL, function_ref< TargetLibraryInfo &(Function &)> GetTLI)
static Function * hasSideeffectFreeStaticResolution(GlobalIFunc &IF)
static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, CallInst *CI, const DataLayout &DL, TargetLibraryInfo *TLI)
If we have a global that is only initialized with a fixed size allocation try to transform the progra...
static void ConstantPropUsersOf(Value *V, const DataLayout &DL, TargetLibraryInfo *TLI)
Walk the use list of V, constant folding all of the instructions that are foldable.
static bool OptimizeStaticIFuncs(Module &M)
Find IFuncs that have resolvers that always point at the same statically known callee,...
static bool hasOnlyColdCalls(Function &F, function_ref< BlockFrequencyInfo &(Function &)> GetBFI, ChangeableCCCacheTy &ChangeableCCCache)
static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV)
Return true if all uses of any loads from GV will trap if the loaded value is null.
static bool hasChangeableCCImpl(Function *F)
Return true if this is a calling convention that we'd like to change.
static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSetImpl< const PHINode * > &PHIs)
Return true if all users of the specified value will trap if the value is dynamically null.
static GlobalVariable * OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI, uint64_t AllocSize, Constant *InitVal, const DataLayout &DL, TargetLibraryInfo *TLI)
This function takes the specified global variable, and transforms the program as if it always contain...
static bool collectVersions(Value *V, SmallVectorImpl< Function * > &Versions, function_ref< TargetTransformInfo &(Function &)> GetTTI)
static bool IsEmptyAtExitFunction(const Function &Fn)
Returns whether the given function is an empty C++ destructor or atexit handler and can therefore be ...
static bool collectSRATypes(DenseMap< uint64_t, GlobalPart > &Parts, GlobalVariable *GV, const DataLayout &DL)
Look at all uses of the global and determine which (offset, type) pairs it can be split into.
static bool valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI, const GlobalVariable *GV)
Scan the use-list of GV checking to make sure that there are no complex uses of GV.
static bool OptimizeFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> GetTLI, function_ref< TargetTransformInfo &(Function &)> GetTTI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI, function_ref< DominatorTree &(Function &)> LookupDomTree, SmallPtrSetImpl< const Comdat * > &NotDiscardableComdats, function_ref< void(Function &F)> ChangedCFGCallback, function_ref< void(Function &F)> DeleteFnCallback)
static bool DeleteDeadIFuncs(Module &M, SmallPtrSetImpl< const Comdat * > &NotDiscardableComdats)
static void RemoveAttribute(Function *F, Attribute::AttrKind A)
static bool hasChangeableCC(Function *F, ChangeableCCCacheTy &ChangeableCCCache)
static bool deleteIfDead(GlobalValue &GV, SmallPtrSetImpl< const Comdat * > &NotDiscardableComdats, function_ref< void(Function &)> DeleteFnCallback=nullptr)
static void RemovePreallocated(Function *F)
static cl::opt< bool > OptimizeNonFMVCallers("optimize-non-fmv-callers", cl::desc("Statically resolve calls to versioned " "functions from non-versioned callers."), cl::init(true), cl::Hidden)
static bool processGlobal(GlobalValue &GV, function_ref< TargetTransformInfo &(Function &)> GetTTI, function_ref< TargetLibraryInfo &(Function &)> GetTLI, function_ref< DominatorTree &(Function &)> LookupDomTree)
Analyze the specified global variable and optimize it if possible.
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, uint64_t FragmentSizeInBits, uint64_t VarSize)
Copy over the debug info for a variable to its SRA replacements.
static cl::opt< bool > EnableColdCCStressTest("enable-coldcc-stress-test", cl::desc("Enable stress test of coldcc by adding " "calling conv to all internal functions."), cl::init(false), cl::Hidden)
static bool OptimizeGlobalAliases(Module &M, SmallPtrSetImpl< const Comdat * > &NotDiscardableComdats)
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal)
At this point, we have learned that the only two values ever stored into GV are its initializer and O...
static void ChangeCalleesToFastCall(Function *F)
Walk all of the direct calls of the specified function, changing them to FastCC.
static bool hasMustTailCallers(Function *F)
static bool OptimizeNonTrivialIFuncs(Module &M, function_ref< TargetTransformInfo &(Function &)> GetTTI)
static bool OptimizeGlobalVars(Module &M, function_ref< TargetTransformInfo &(Function &)> GetTTI, function_ref< TargetLibraryInfo &(Function &)> GetTLI, function_ref< DominatorTree &(Function &)> LookupDomTree, SmallPtrSetImpl< const Comdat * > &NotDiscardableComdats)
static void allUsesOfLoadAndStores(GlobalVariable *GV, SmallVector< Value *, 4 > &Uses)
Get all the loads/store uses for global variable GV.
static bool OptimizeEmptyGlobalAtExitDtors(Function *CXAAtExitFn, bool isCXX)
static bool mayHaveOtherReferences(GlobalValue &GV, const LLVMUsed &U)
static void changeCallSitesToColdCC(Function *F)
static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs, Attribute::AttrKind A)
static bool hasInvokeCallers(Function *F)
static void setUsedInitializer(GlobalVariable &V, const SmallPtrSetImpl< GlobalValue * > &Init)
static cl::opt< unsigned > MaxIFuncVersions("max-ifunc-versions", cl::Hidden, cl::init(5), cl::desc("Maximum number of caller/callee versions that is allowed for " "using the expensive (cubic) static resolution algorithm."))
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, const DataLayout &DL, function_ref< TargetLibraryInfo &(Function &)> GetTLI)
The specified global has only one non-null value stored into it.
static bool isValidCandidateForColdCC(Function &F, function_ref< BlockFrequencyInfo &(Function &)> GetBFI, const std::vector< Function * > &AllCallsCold)
static cl::opt< int > ColdCCRelFreq("coldcc-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a call site to be considered cold for enabling " "coldcc"))
static bool optimizeGlobalsInModule(Module &M, const DataLayout &DL, function_ref< TargetLibraryInfo &(Function &)> GetTLI, function_ref< TargetTransformInfo &(Function &)> GetTTI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI, function_ref< DominatorTree &(Function &)> LookupDomTree, function_ref< void(Function &F)> ChangedCFGCallback, function_ref< void(Function &F)> DeleteFnCallback)
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, TargetLibraryInfo *TLI)
Evaluate static constructors in the function, if we can.
static bool CleanupConstantGlobalUsers(GlobalVariable *GV, const DataLayout &DL)
We just marked GV constant.
SmallDenseMap< Function *, bool, 8 > ChangeableCCCacheTy
static bool isLeakCheckerRoot(GlobalVariable *GV)
Is this global variable possibly used by a leak checker as a root?
static bool forwardStoredOnceStore(GlobalVariable *GV, const StoreInst *StoredOnceStore, function_ref< DominatorTree &(Function &)> LookupDomTree)
static int compareNames(Constant *const *A, Constant *const *B)
static bool CleanupPointerRootUsers(GlobalVariable *GV, function_ref< TargetLibraryInfo &(Function &)> GetTLI)
This GV is a pointer root.
static bool isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV, function_ref< DominatorTree &(Function &)> LookupDomTree)
static bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, function_ref< TargetTransformInfo &(Function &)> GetTTI, function_ref< TargetLibraryInfo &(Function &)> GetTLI, function_ref< DominatorTree &(Function &)> LookupDomTree)
Analyze the specified global variable and optimize it if possible.
static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, bool &RenameTarget)
static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV)
static GlobalVariable * SRAGlobal(GlobalVariable *GV, const DataLayout &DL)
Perform scalar replacement of aggregates on the specified global variable.
static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U)
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition Attributes.h:88
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isMustTailCall() const
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
bool isSigned() const
Definition InstrTypes.h:930
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1130
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1284
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
Definition Constant.h:43
const Constant * stripPointerCasts() const
Definition Constant.h:222
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
DWARF expression.
LLVM_ABI bool extractIfOffset(int64_t &Offset) const
If this is a constant offset, extract it.
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prependOpcodes(const DIExpression *Expr, SmallVectorImpl< uint64_t > &Ops, bool StackValue=false, bool EntryValue=false)
Prepend DIExpr with the given opcodes and optionally turn it into a stack value.
A pair of DIGlobalVariable and DIExpression.
uint64_t getSizeInBits() const
Base class for variables.
DIType * getType() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static DebugLoc getCompilerGenerated()
Definition DebugLoc.h:162
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
bool erase(const KeyT &Val)
Definition DenseMap.h:330
unsigned size() const
Definition DenseMap.h:110
bool empty() const
Definition DenseMap.h:109
iterator begin()
Definition DenseMap.h:78
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition Evaluator.h:37
DenseMap< GlobalVariable *, Constant * > getMutatedInitializers() const
Definition Evaluator.h:102
bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl< Constant * > &ActualArgs)
Evaluate a call to function F, returning true if successful, false if we can't evaluate it.
const SmallPtrSetImpl< GlobalVariable * > & getInvariants() const
Definition Evaluator.h:109
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
iterator begin()
Definition Function.h:851
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getAliasee() const
Definition GlobalAlias.h:87
LLVM_ABI const Function * getResolverFunction() const
Definition Globals.cpp:665
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
bool isDSOLocal() const
bool isImplicitDSOLocal() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
LinkageTypes getLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
LLVM_ABI const Comdat * getComdat() const
Definition Globals.cpp:201
ThreadLocalMode getThreadLocalMode() const
void setLinkage(LinkageTypes LT)
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
Definition Globals.cpp:93
PointerType * getType() const
Global values are always pointers.
LLVM_ABI bool isInterposable() const
Return true if this global's definition can be substituted with an arbitrary definition at link time ...
Definition Globals.cpp:107
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
static bool isInterposableLinkage(LinkageTypes Linkage)
Whether the definition of this global may be replaced by something non-equivalent at link time.
bool hasGlobalUnnamedAddr() const
UnnamedAddr getUnnamedAddr() const
static bool isWeakForLinker(LinkageTypes Linkage)
Whether the definition of this global may be replaced at link time.
static bool isDiscardableIfUnused(LinkageTypes Linkage)
Whether the definition of this global may be discarded if it is not used in its compilation unit.
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ AppendingLinkage
Special purpose, only applies to global arrays.
Definition GlobalValue.h:59
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
Definition Globals.cpp:524
bool isExternallyInitialized() const
MaybeAlign getAlign() const
Returns the alignment of the given variable.
void setConstant(bool Val)
LLVM_ABI void copyAttributesFrom(const GlobalVariable *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a GlobalVariable) fro...
Definition Globals.cpp:553
LLVM_ABI void getDebugInfo(SmallVectorImpl< DIGlobalVariableExpression * > &GVs) const
Fill the vector with all debug info attachements.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Globals.cpp:520
LLVM_ABI void addDebugInfo(DIGlobalVariableExpression *GV)
Attach a DIGlobalVariableExpression.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
A wrapper class for inspecting calls to intrinsic functions.
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
LLVMContext & getContext() const
Definition Metadata.h:1242
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void insertGlobalVariable(GlobalVariable *GV)
Insert global variable GV at the end of the global variable list and take ownership.
Definition Module.h:568
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
static LLVM_ABI void SalvageDebugInfo(const Constant &C)
Replace all uses of the constant with Undef in debug info metadata.
Definition Metadata.cpp:332
Interface for looking up the initializer for a variable name, used by Init::resolveReferences.
Definition Record.h:2199
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
int compare(StringRef RHS) const
compare - Compare two strings; the result is negative, zero, or positive if this string is lexicograp...
Definition StringRef.h:179
Class to represent struct types.
ArrayRef< Type * > elements() const
bool isOpaque() const
Return true if this is a type with an identity that has no body specified yet.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition Type.h:74
@ ScalableVectorTyID
Scalable SIMD vector type.
Definition Type.h:76
@ StructTyID
Structures.
Definition Type.h:73
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition Type.h:75
@ PointerTyID
Pointers.
Definition Type.h:72
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:293
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Use * op_iterator
Definition User.h:279
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
use_iterator use_begin()
Definition Value.h:364
User * user_back()
Definition Value.h:412
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
user_iterator_impl< User > user_iterator
Definition Value.h:391
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
This class represents zero extension of integer types.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ X86_ThisCall
Similar to X86_StdCall.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
LLVM_ABI Constant * ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty, const DataLayout &DL)
If C is a uniform value where all bits are the same (either all zero, all ones, all undef or all pois...
bool isSafeToDestroyConstant(const Constant *C)
It is safe to destroy a constant iff it is only used by constants itself.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition Local.cpp:1566
bool optimizeGlobalCtorsList(Module &M, function_ref< bool(uint32_t, Function *)> ShouldRemove)
Call "ShouldRemove" for every entry in M's global_ctor list and remove the entries for which it retur...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isPointerTy(const Type *T)
Definition SPIRVUtils.h:359
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
TargetTransformInfo TTI
DWARFExpression::Operation Op
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition Local.cpp:548
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1973
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates or reallocates memory (eith...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1594
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
Definition Local.cpp:2883
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
LLVM_ABI GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition Module.cpp:870
Part of the global at a specific offset, which is only accessed through loads and stores with the giv...
Constant * Initializer
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
As we analyze each global or thread-local variable, keep track of some information about it.
@ InitializerStored
This global is stored to, but the only thing stored is the constant it was initialized with.
@ StoredOnce
This global is stored to, but only its initializer and one other value is ever stored to it.
static bool analyzeGlobal(const Value *V, GlobalStatus &GS)
Look at all uses of the global and fill in the GlobalStatus structure.
Various options to control the behavior of getObjectSize.
Function object to check whether the first component of a container supported by std::get (like std::...
Definition STLExtras.h:1437