LLVM 20.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges globals with internal linkage into one. This way all the
10// globals which were merged into a biggest one can be addressed using offsets
11// from the same base pointer (no need for separate base pointer for each of the
12// global). Such a transformation can significantly reduce the register pressure
13// when many globals are involved.
14//
15// For example, consider the code which touches several global variables at
16// once:
17//
18// static int foo[N], bar[N], baz[N];
19//
20// for (i = 0; i < N; ++i) {
21// foo[i] = bar[i] * baz[i];
22// }
23//
24// On ARM the addresses of 3 arrays should be kept in the registers, thus
25// this code has quite large register pressure (loop body):
26//
27// ldr r1, [r5], #4
28// ldr r2, [r6], #4
29// mul r1, r2, r1
30// str r1, [r0], #4
31//
32// Pass converts the code to something like:
33//
34// static struct {
35// int foo[N];
36// int bar[N];
37// int baz[N];
38// } merged;
39//
40// for (i = 0; i < N; ++i) {
41// merged.foo[i] = merged.bar[i] * merged.baz[i];
42// }
43//
44// and in ARM code this becomes:
45//
46// ldr r0, [r5, #40]
47// ldr r1, [r5, #80]
48// mul r0, r1, r0
49// str r0, [r5], #4
50//
51// note that we saved 2 registers here almostly "for free".
52//
53// However, merging globals can have tradeoffs:
54// - it confuses debuggers, tools, and users
55// - it makes linker optimizations less useful (order files, LOHs, ...)
56// - it forces usage of indexed addressing (which isn't necessarily "free")
57// - it can increase register pressure when the uses are disparate enough.
58//
59// We use heuristics to discover the best global grouping we can (cf cl::opts).
60//
61// ===---------------------------------------------------------------------===//
62
64#include "llvm/ADT/BitVector.h"
65#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/MapVector.h"
67#include "llvm/ADT/SetVector.h"
69#include "llvm/ADT/Statistic.h"
70#include "llvm/ADT/StringRef.h"
71#include "llvm/ADT/Twine.h"
72#include "llvm/CodeGen/Passes.h"
73#include "llvm/IR/BasicBlock.h"
74#include "llvm/IR/Constants.h"
75#include "llvm/IR/DataLayout.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/Instruction.h"
82#include "llvm/IR/Module.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
87#include "llvm/MC/SectionKind.h"
88#include "llvm/Pass.h"
91#include "llvm/Support/Debug.h"
96#include <algorithm>
97#include <cassert>
98#include <cstddef>
99#include <cstdint>
100#include <string>
101#include <vector>
102
103using namespace llvm;
104
105#define DEBUG_TYPE "global-merge"
106
107// FIXME: This is only useful as a last-resort way to disable the pass.
108static cl::opt<bool>
109EnableGlobalMerge("enable-global-merge", cl::Hidden,
110 cl::desc("Enable the global merge pass"),
111 cl::init(true));
112
114GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
115 cl::desc("Set maximum offset for global merge pass"),
116 cl::init(0));
117
119 "global-merge-group-by-use", cl::Hidden,
120 cl::desc("Improve global merge pass to look at uses"), cl::init(true));
121
123 "global-merge-ignore-single-use", cl::Hidden,
124 cl::desc("Improve global merge pass to ignore globals only used alone"),
125 cl::init(true));
126
127static cl::opt<bool>
128EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
129 cl::desc("Enable global merge pass on constants"),
130 cl::init(false));
131
132// FIXME: this could be a transitional option, and we probably need to remove
133// it if only we are sure this optimization could always benefit all targets.
135EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
136 cl::desc("Enable global merge pass on external linkage"));
137
139 GlobalMergeMinDataSize("global-merge-min-data-size",
140 cl::desc("The minimum size in bytes of each global "
141 "that should considered in merging."),
142 cl::init(0), cl::Hidden);
143
144STATISTIC(NumMerged, "Number of globals merged");
145
146namespace {
147
148class GlobalMergeImpl {
149 const TargetMachine *TM = nullptr;
151 bool IsMachO = false;
152
153private:
154 bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
155 bool isConst, unsigned AddrSpace) const;
156
157 /// Merge everything in \p Globals for which the corresponding bit
158 /// in \p GlobalSet is set.
159 bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
160 const BitVector &GlobalSet, Module &M, bool isConst,
161 unsigned AddrSpace) const;
162
163 /// Check if the given variable has been identified as must keep
164 /// \pre setMustKeepGlobalVariables must have been called on the Module that
165 /// contains GV
166 bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
167 return MustKeepGlobalVariables.count(GV);
168 }
169
170 /// Collect every variables marked as "used" or used in a landing pad
171 /// instruction for this Module.
172 void setMustKeepGlobalVariables(Module &M);
173
174 /// Collect every variables marked as "used"
176
177 /// Keep track of the GlobalVariable that must not be merged away
178 SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
179
180public:
181 GlobalMergeImpl(const TargetMachine *TM, GlobalMergeOptions Opt)
182 : TM(TM), Opt(Opt) {}
183 bool run(Module &M);
184};
185
186class GlobalMerge : public FunctionPass {
187 const TargetMachine *TM = nullptr;
189
190public:
191 static char ID; // Pass identification, replacement for typeid.
192
193 explicit GlobalMerge() : FunctionPass(ID) {
196 }
197
198 explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
199 bool OnlyOptimizeForSize, bool MergeExternalGlobals,
200 bool MergeConstantGlobals)
201 : FunctionPass(ID), TM(TM) {
202 Opt.MaxOffset = MaximalOffset;
203 Opt.SizeOnly = OnlyOptimizeForSize;
204 Opt.MergeExternal = MergeExternalGlobals;
205 Opt.MergeConstantGlobals = MergeConstantGlobals;
207 }
208
209 bool doInitialization(Module &M) override {
210 auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
211 Metadata *SDL = M.getModuleFlag("SmallDataLimit");
212 if (!SDL)
213 return std::nullopt;
214 return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
215 };
216 if (GlobalMergeMinDataSize.getNumOccurrences())
218 else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
219 Opt.MinSize = *SDL + 1;
220 else
221 Opt.MinSize = 0;
222
223 GlobalMergeImpl P(TM, Opt);
224 return P.run(M);
225 }
226 bool runOnFunction(Function &F) override { return false; }
227
228 StringRef getPassName() const override { return "Merge internal globals"; }
229
230 void getAnalysisUsage(AnalysisUsage &AU) const override {
231 AU.setPreservesCFG();
233 }
234};
235
236} // end anonymous namespace
237
239 GlobalMergeImpl P(TM, Options);
240 bool Changed = P.run(M);
241 if (!Changed)
242 return PreservedAnalyses::all();
243
246 return PA;
247}
248
249char GlobalMerge::ID = 0;
250
251INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
252
253bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
254 Module &M, bool isConst,
255 unsigned AddrSpace) const {
256 auto &DL = M.getDataLayout();
257 // FIXME: Find better heuristics
259 Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
260 // We don't support scalable global variables.
261 return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() <
262 DL.getTypeAllocSize(GV2->getValueType()).getFixedValue();
263 });
264
265 // If we want to just blindly group all globals together, do so.
267 BitVector AllGlobals(Globals.size());
268 AllGlobals.set();
269 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
270 }
271
272 // If we want to be smarter, look at all uses of each global, to try to
273 // discover all sets of globals used together, and how many times each of
274 // these sets occurred.
275 //
276 // Keep this reasonably efficient, by having an append-only list of all sets
277 // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
278 // code (currently, a Function) to the set of globals seen so far that are
279 // used together in that unit (GlobalUsesByFunction).
280 //
281 // When we look at the Nth global, we know that any new set is either:
282 // - the singleton set {N}, containing this global only, or
283 // - the union of {N} and a previously-discovered set, containing some
284 // combination of the previous N-1 globals.
285 // Using that knowledge, when looking at the Nth global, we can keep:
286 // - a reference to the singleton set {N} (CurGVOnlySetIdx)
287 // - a list mapping each previous set to its union with {N} (EncounteredUGS),
288 // if it actually occurs.
289
290 // We keep track of the sets of globals used together "close enough".
291 struct UsedGlobalSet {
292 BitVector Globals;
293 unsigned UsageCount = 1;
294
295 UsedGlobalSet(size_t Size) : Globals(Size) {}
296 };
297
298 // Each set is unique in UsedGlobalSets.
299 std::vector<UsedGlobalSet> UsedGlobalSets;
300
301 // Avoid repeating the create-global-set pattern.
302 auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
303 UsedGlobalSets.emplace_back(Globals.size());
304 return UsedGlobalSets.back();
305 };
306
307 // The first set is the empty set.
308 CreateGlobalSet().UsageCount = 0;
309
310 // We define "close enough" to be "in the same function".
311 // FIXME: Grouping uses by function is way too aggressive, so we should have
312 // a better metric for distance between uses.
313 // The obvious alternative would be to group by BasicBlock, but that's in
314 // turn too conservative..
315 // Anything in between wouldn't be trivial to compute, so just stick with
316 // per-function grouping.
317
318 // The value type is an index into UsedGlobalSets.
319 // The default (0) conveniently points to the empty set.
320 DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
321
322 // Now, look at each merge-eligible global in turn.
323
324 // Keep track of the sets we already encountered to which we added the
325 // current global.
326 // Each element matches the same-index element in UsedGlobalSets.
327 // This lets us efficiently tell whether a set has already been expanded to
328 // include the current global.
329 std::vector<size_t> EncounteredUGS;
330
331 for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
332 GlobalVariable *GV = Globals[GI];
333
334 // Reset the encountered sets for this global and grow it in case we created
335 // new sets for the previous global.
336 EncounteredUGS.assign(UsedGlobalSets.size(), 0);
337
338 // We might need to create a set that only consists of the current global.
339 // Keep track of its index into UsedGlobalSets.
340 size_t CurGVOnlySetIdx = 0;
341
342 // For each global, look at all its Uses.
343 for (auto &U : GV->uses()) {
344 // This Use might be a ConstantExpr. We're interested in Instruction
345 // users, so look through ConstantExpr...
346 Use *UI, *UE;
347 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
348 if (CE->use_empty())
349 continue;
350 UI = &*CE->use_begin();
351 UE = nullptr;
352 } else if (isa<Instruction>(U.getUser())) {
353 UI = &U;
354 UE = UI->getNext();
355 } else {
356 continue;
357 }
358
359 // ...to iterate on all the instruction users of the global.
360 // Note that we iterate on Uses and not on Users to be able to getNext().
361 for (; UI != UE; UI = UI->getNext()) {
362 Instruction *I = dyn_cast<Instruction>(UI->getUser());
363 if (!I)
364 continue;
365
366 Function *ParentFn = I->getParent()->getParent();
367
368 // If we're only optimizing for size, ignore non-minsize functions.
369 if (Opt.SizeOnly && !ParentFn->hasMinSize())
370 continue;
371
372 size_t UGSIdx = GlobalUsesByFunction[ParentFn];
373
374 // If this is the first global the basic block uses, map it to the set
375 // consisting of this global only.
376 if (!UGSIdx) {
377 // If that set doesn't exist yet, create it.
378 if (!CurGVOnlySetIdx) {
379 CurGVOnlySetIdx = UsedGlobalSets.size();
380 CreateGlobalSet().Globals.set(GI);
381 } else {
382 ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
383 }
384
385 GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
386 continue;
387 }
388
389 // If we already encountered this BB, just increment the counter.
390 if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
391 ++UsedGlobalSets[UGSIdx].UsageCount;
392 continue;
393 }
394
395 // If not, the previous set wasn't actually used in this function.
396 --UsedGlobalSets[UGSIdx].UsageCount;
397
398 // If we already expanded the previous set to include this global, just
399 // reuse that expanded set.
400 if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
401 ++UsedGlobalSets[ExpandedIdx].UsageCount;
402 GlobalUsesByFunction[ParentFn] = ExpandedIdx;
403 continue;
404 }
405
406 // If not, create a new set consisting of the union of the previous set
407 // and this global. Mark it as encountered, so we can reuse it later.
408 GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
409 UsedGlobalSets.size();
410
411 UsedGlobalSet &NewUGS = CreateGlobalSet();
412 NewUGS.Globals.set(GI);
413 NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
414 }
415 }
416 }
417
418 // Now we found a bunch of sets of globals used together. We accumulated
419 // the number of times we encountered the sets (i.e., the number of blocks
420 // that use that exact set of globals).
421 //
422 // Multiply that by the size of the set to give us a crude profitability
423 // metric.
424 llvm::stable_sort(UsedGlobalSets,
425 [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
426 return UGS1.Globals.count() * UGS1.UsageCount <
427 UGS2.Globals.count() * UGS2.UsageCount;
428 });
429
430 // We can choose to merge all globals together, but ignore globals never used
431 // with another global. This catches the obviously non-profitable cases of
432 // having a single global, but is aggressive enough for any other case.
434 BitVector AllGlobals(Globals.size());
435 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
436 if (UGS.UsageCount == 0)
437 continue;
438 if (UGS.Globals.count() > 1)
439 AllGlobals |= UGS.Globals;
440 }
441 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
442 }
443
444 // Starting from the sets with the best (=biggest) profitability, find a
445 // good combination.
446 // The ideal (and expensive) solution can only be found by trying all
447 // combinations, looking for the one with the best profitability.
448 // Don't be smart about it, and just pick the first compatible combination,
449 // starting with the sets with the best profitability.
450 BitVector PickedGlobals(Globals.size());
451 bool Changed = false;
452
453 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
454 if (UGS.UsageCount == 0)
455 continue;
456 if (PickedGlobals.anyCommon(UGS.Globals))
457 continue;
458 PickedGlobals |= UGS.Globals;
459 // If the set only contains one global, there's no point in merging.
460 // Ignore the global for inclusion in other sets though, so keep it in
461 // PickedGlobals.
462 if (UGS.Globals.count() < 2)
463 continue;
464 Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
465 }
466
467 return Changed;
468}
469
470bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
471 const BitVector &GlobalSet, Module &M,
472 bool isConst, unsigned AddrSpace) const {
473 assert(Globals.size() > 1);
474
475 Type *Int32Ty = Type::getInt32Ty(M.getContext());
476 Type *Int8Ty = Type::getInt8Ty(M.getContext());
477 auto &DL = M.getDataLayout();
478
479 LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
480 << GlobalSet.find_first() << ", total of " << Globals.size()
481 << "\n");
482
483 bool Changed = false;
484 ssize_t i = GlobalSet.find_first();
485 while (i != -1) {
486 ssize_t j = 0;
487 uint64_t MergedSize = 0;
488 std::vector<Type*> Tys;
489 std::vector<Constant*> Inits;
490 std::vector<unsigned> StructIdxs;
491
492 bool HasExternal = false;
493 StringRef FirstExternalName;
494 Align MaxAlign;
495 unsigned CurIdx = 0;
496 for (j = i; j != -1; j = GlobalSet.find_next(j)) {
497 Type *Ty = Globals[j]->getValueType();
498
499 // Make sure we use the same alignment AsmPrinter would use.
500 Align Alignment = DL.getPreferredAlign(Globals[j]);
501 unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
502 MergedSize += Padding;
503 MergedSize += DL.getTypeAllocSize(Ty);
504 if (MergedSize > Opt.MaxOffset) {
505 break;
506 }
507 if (Padding) {
508 Tys.push_back(ArrayType::get(Int8Ty, Padding));
509 Inits.push_back(ConstantAggregateZero::get(Tys.back()));
510 ++CurIdx;
511 }
512 Tys.push_back(Ty);
513 Inits.push_back(Globals[j]->getInitializer());
514 StructIdxs.push_back(CurIdx++);
515
516 MaxAlign = std::max(MaxAlign, Alignment);
517
518 if (Globals[j]->hasExternalLinkage() && !HasExternal) {
519 HasExternal = true;
520 FirstExternalName = Globals[j]->getName();
521 }
522 }
523
524 // Exit early if there is only one global to merge.
525 if (Tys.size() < 2) {
526 i = j;
527 continue;
528 }
529
530 // If merged variables doesn't have external linkage, we needn't to expose
531 // the symbol after merging.
535 // Use a packed struct so we can control alignment.
536 StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
537 Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
538
539 // On Darwin external linkage needs to be preserved, otherwise
540 // dsymutil cannot preserve the debug info for the merged
541 // variables. If they have external linkage, use the symbol name
542 // of the first variable merged as the suffix of global symbol
543 // name. This avoids a link-time naming conflict for the
544 // _MergedGlobals symbols.
545 Twine MergedName =
546 (IsMachO && HasExternal)
547 ? "_MergedGlobals_" + FirstExternalName
548 : "_MergedGlobals";
549 auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
550 auto *MergedGV = new GlobalVariable(
551 M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
553
554 MergedGV->setAlignment(MaxAlign);
555 MergedGV->setSection(Globals[i]->getSection());
556
557 LLVM_DEBUG(dbgs() << "MergedGV: " << *MergedGV << "\n");
558
559 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
560 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
561 GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
562 std::string Name(Globals[k]->getName());
563 GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
565 Globals[k]->getDLLStorageClass();
566
567 // Copy metadata while adjusting any debug info metadata by the original
568 // global's offset within the merged global.
569 MergedGV->copyMetadata(Globals[k],
570 MergedLayout->getElementOffset(StructIdxs[idx]));
571
572 Constant *Idx[2] = {
573 ConstantInt::get(Int32Ty, 0),
574 ConstantInt::get(Int32Ty, StructIdxs[idx]),
575 };
576 Constant *GEP =
577 ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
578 Globals[k]->replaceAllUsesWith(GEP);
579 Globals[k]->eraseFromParent();
580
581 // When the linkage is not internal we must emit an alias for the original
582 // variable name as it may be accessed from another object. On non-Mach-O
583 // we can also emit an alias for internal linkage as it's safe to do so.
584 // It's not safe on Mach-O as the alias (and thus the portion of the
585 // MergedGlobals variable) may be dead stripped at link time.
586 if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
587 GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
588 Linkage, Name, GEP, &M);
589 GA->setVisibility(Visibility);
590 GA->setDLLStorageClass(DLLStorage);
591 }
592
593 NumMerged++;
594 }
595 Changed = true;
596 i = j;
597 }
598
599 return Changed;
600}
601
602void GlobalMergeImpl::collectUsedGlobalVariables(Module &M, StringRef Name) {
603 // Extract global variables from llvm.used array
604 const GlobalVariable *GV = M.getGlobalVariable(Name);
605 if (!GV || !GV->hasInitializer()) return;
606
607 // Should be an array of 'i8*'.
608 const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
609
610 for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
611 if (const GlobalVariable *G =
612 dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
613 MustKeepGlobalVariables.insert(G);
614}
615
616void GlobalMergeImpl::setMustKeepGlobalVariables(Module &M) {
617 collectUsedGlobalVariables(M, "llvm.used");
618 collectUsedGlobalVariables(M, "llvm.compiler.used");
619
620 for (Function &F : M) {
621 for (BasicBlock &BB : F) {
622 Instruction *Pad = BB.getFirstNonPHI();
623 if (!Pad->isEHPad())
624 continue;
625
626 // Keep globals used by landingpads and catchpads.
627 for (const Use &U : Pad->operands()) {
628 if (const GlobalVariable *GV =
629 dyn_cast<GlobalVariable>(U->stripPointerCasts()))
630 MustKeepGlobalVariables.insert(GV);
631 else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
632 for (const Use &Elt : CA->operands()) {
633 if (const GlobalVariable *GV =
634 dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
635 MustKeepGlobalVariables.insert(GV);
636 }
637 }
638 }
639 }
640 }
641}
642
643bool GlobalMergeImpl::run(Module &M) {
645 return false;
646
647 IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
648
649 auto &DL = M.getDataLayout();
651 Globals, ConstGlobals, BSSGlobals;
652 bool Changed = false;
653 setMustKeepGlobalVariables(M);
654
655 LLVM_DEBUG({
656 dbgs() << "Number of GV that must be kept: " <<
657 MustKeepGlobalVariables.size() << "\n";
658 for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
659 dbgs() << "Kept: " << *KeptGV << "\n";
660 });
661 // Grab all non-const globals.
662 for (auto &GV : M.globals()) {
663 // Merge is safe for "normal" internal or external globals only
664 if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
665 continue;
666
667 // It's not safe to merge globals that may be preempted
668 if (TM && !TM->shouldAssumeDSOLocal(&GV))
669 continue;
670
671 if (!(Opt.MergeExternal && GV.hasExternalLinkage()) &&
672 !GV.hasLocalLinkage())
673 continue;
674
675 PointerType *PT = dyn_cast<PointerType>(GV.getType());
676 assert(PT && "Global variable is not a pointer!");
677
678 unsigned AddressSpace = PT->getAddressSpace();
680
681 // Ignore all 'special' globals.
682 if (GV.getName().starts_with("llvm.") || GV.getName().starts_with(".llvm."))
683 continue;
684
685 // Ignore all "required" globals:
686 if (isMustKeepGlobalVariable(&GV))
687 continue;
688
689 // Don't merge tagged globals, as each global should have its own unique
690 // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
691 // with compatible alignment and the same contents may be merged as long as
692 // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
693 // size_b / 16`).
694 if (GV.isTagged())
695 continue;
696
697 Type *Ty = GV.getValueType();
698 TypeSize AllocSize = DL.getTypeAllocSize(Ty);
699 if (AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize) {
700 if (TM &&
702 BSSGlobals[{AddressSpace, Section}].push_back(&GV);
703 else if (GV.isConstant())
704 ConstGlobals[{AddressSpace, Section}].push_back(&GV);
705 else
706 Globals[{AddressSpace, Section}].push_back(&GV);
707 }
708 LLVM_DEBUG(dbgs() << "GV "
709 << ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset)
710 ? "to merge: "
711 : "not to merge: ")
712 << GV << "\n");
713 }
714
715 for (auto &P : Globals)
716 if (P.second.size() > 1)
717 Changed |= doMerge(P.second, M, false, P.first.first);
718
719 for (auto &P : BSSGlobals)
720 if (P.second.size() > 1)
721 Changed |= doMerge(P.second, M, false, P.first.first);
722
723 if (Opt.MergeConstantGlobals)
724 for (auto &P : ConstGlobals)
725 if (P.second.size() > 1)
726 Changed |= doMerge(P.second, M, true, P.first.first);
727
728 return Changed;
729}
730
732 bool OnlyOptimizeForSize,
733 bool MergeExternalByDefault,
734 bool MergeConstantByDefault) {
735 bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
736 MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
737 bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
738 return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
739 MergeConstant);
740}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
#define DEBUG_TYPE
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
static cl::opt< unsigned > GlobalMergeMinDataSize("global-merge-min-data-size", cl::desc("The minimum size in bytes of each global " "that should considered in merging."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
Hexagon Common GEP
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file implements a map that provides insertion order iteration.
Module.h This file contains the declarations for the Module class.
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:635
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:300
bool back() const
Return the last element in the vector.
Definition: BitVector.h:456
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:308
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1650
ConstantArray - Constant Array Declarations.
Definition: Constants.h:424
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1097
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1280
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1357
This is an important base class in LLVM.
Definition: Constant.h:42
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:550
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:118
bool hasExternalLinkage() const
Definition: GlobalValue.h:511
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290
bool hasLocalLinkage() const
Definition: GlobalValue.h:528
bool isTagged() const
Definition: GlobalValue.h:365
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:284
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition: GlobalValue.h:73
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:66
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool hasImplicitSection() const
Check if section name is present.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
void push_back(MachineInstr *MI)
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:571
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:600
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:361
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:732
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
@ SDL
Definition: COFF.h:815
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition: ELF.h:533
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2020
AddressSpace
Definition: NVPTXBaseInfo.h:21
void initializeGlobalMergePass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false, bool MergeConstantByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:830
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool SizeOnly
Whether we should try to optimize for size only.
Definition: GlobalMerge.h:37
bool MergeExternal
Whether we should merge global variables that have external linkage.
Definition: GlobalMerge.h:30
bool MergeConstantGlobals
Whether we should merge constant global variables.
Definition: GlobalMerge.h:32