LLVM 20.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges globals with internal linkage into one. This way all the
10// globals which were merged into a biggest one can be addressed using offsets
11// from the same base pointer (no need for separate base pointer for each of the
12// global). Such a transformation can significantly reduce the register pressure
13// when many globals are involved.
14//
15// For example, consider the code which touches several global variables at
16// once:
17//
18// static int foo[N], bar[N], baz[N];
19//
20// for (i = 0; i < N; ++i) {
21// foo[i] = bar[i] * baz[i];
22// }
23//
24// On ARM the addresses of 3 arrays should be kept in the registers, thus
25// this code has quite large register pressure (loop body):
26//
27// ldr r1, [r5], #4
28// ldr r2, [r6], #4
29// mul r1, r2, r1
30// str r1, [r0], #4
31//
32// Pass converts the code to something like:
33//
34// static struct {
35// int foo[N];
36// int bar[N];
37// int baz[N];
38// } merged;
39//
40// for (i = 0; i < N; ++i) {
41// merged.foo[i] = merged.bar[i] * merged.baz[i];
42// }
43//
44// and in ARM code this becomes:
45//
46// ldr r0, [r5, #40]
47// ldr r1, [r5, #80]
48// mul r0, r1, r0
49// str r0, [r5], #4
50//
51// note that we saved 2 registers here almostly "for free".
52//
53// However, merging globals can have tradeoffs:
54// - it confuses debuggers, tools, and users
55// - it makes linker optimizations less useful (order files, LOHs, ...)
56// - it forces usage of indexed addressing (which isn't necessarily "free")
57// - it can increase register pressure when the uses are disparate enough.
58//
59// We use heuristics to discover the best global grouping we can (cf cl::opts).
60//
61// ===---------------------------------------------------------------------===//
62
64#include "llvm/ADT/BitVector.h"
65#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/MapVector.h"
67#include "llvm/ADT/SetVector.h"
69#include "llvm/ADT/Statistic.h"
70#include "llvm/ADT/StringRef.h"
71#include "llvm/ADT/Twine.h"
72#include "llvm/CodeGen/Passes.h"
73#include "llvm/IR/BasicBlock.h"
74#include "llvm/IR/Constants.h"
75#include "llvm/IR/DataLayout.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Module.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
88#include "llvm/MC/SectionKind.h"
89#include "llvm/Pass.h"
92#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstddef>
100#include <cstdint>
101#include <string>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "global-merge"
107
108// FIXME: This is only useful as a last-resort way to disable the pass.
109static cl::opt<bool>
110EnableGlobalMerge("enable-global-merge", cl::Hidden,
111 cl::desc("Enable the global merge pass"),
112 cl::init(true));
113
115GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
116 cl::desc("Set maximum offset for global merge pass"),
117 cl::init(0));
118
120 "global-merge-group-by-use", cl::Hidden,
121 cl::desc("Improve global merge pass to look at uses"), cl::init(true));
122
124 "global-merge-all-const", cl::Hidden,
125 cl::desc("Merge all const globals without looking at uses"),
126 cl::init(false));
127
129 "global-merge-ignore-single-use", cl::Hidden,
130 cl::desc("Improve global merge pass to ignore globals only used alone"),
131 cl::init(true));
132
133static cl::opt<bool>
134EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
135 cl::desc("Enable global merge pass on constants"),
136 cl::init(false));
137
138// FIXME: this could be a transitional option, and we probably need to remove
139// it if only we are sure this optimization could always benefit all targets.
141EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
142 cl::desc("Enable global merge pass on external linkage"));
143
145 GlobalMergeMinDataSize("global-merge-min-data-size",
146 cl::desc("The minimum size in bytes of each global "
147 "that should considered in merging."),
148 cl::init(0), cl::Hidden);
149
150STATISTIC(NumMerged, "Number of globals merged");
151
152namespace {
153
154class GlobalMergeImpl {
155 const TargetMachine *TM = nullptr;
157 bool IsMachO = false;
158
159private:
160 bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
161 bool isConst, unsigned AddrSpace) const;
162
163 /// Merge everything in \p Globals for which the corresponding bit
164 /// in \p GlobalSet is set.
165 bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
166 const BitVector &GlobalSet, Module &M, bool isConst,
167 unsigned AddrSpace) const;
168
169 /// Check if the given variable has been identified as must keep
170 /// \pre setMustKeepGlobalVariables must have been called on the Module that
171 /// contains GV
172 bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
173 return MustKeepGlobalVariables.count(GV);
174 }
175
176 /// Collect every variables marked as "used" or used in a landing pad
177 /// instruction for this Module.
178 void setMustKeepGlobalVariables(Module &M);
179
180 /// Collect every variables marked as "used"
182
183 /// Keep track of the GlobalVariable that must not be merged away
184 SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
185
186public:
187 GlobalMergeImpl(const TargetMachine *TM, GlobalMergeOptions Opt)
188 : TM(TM), Opt(Opt) {}
189 bool run(Module &M);
190};
191
192class GlobalMerge : public FunctionPass {
193 const TargetMachine *TM = nullptr;
195
196public:
197 static char ID; // Pass identification, replacement for typeid.
198
199 explicit GlobalMerge() : FunctionPass(ID) {
202 }
203
204 explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
205 bool OnlyOptimizeForSize, bool MergeExternalGlobals,
206 bool MergeConstantGlobals, bool MergeConstAggressive)
207 : FunctionPass(ID), TM(TM) {
208 Opt.MaxOffset = MaximalOffset;
209 Opt.SizeOnly = OnlyOptimizeForSize;
210 Opt.MergeExternal = MergeExternalGlobals;
211 Opt.MergeConstantGlobals = MergeConstantGlobals;
212 Opt.MergeConstAggressive = MergeConstAggressive;
214 }
215
216 bool doInitialization(Module &M) override {
217 auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
218 Metadata *SDL = M.getModuleFlag("SmallDataLimit");
219 if (!SDL)
220 return std::nullopt;
221 return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
222 };
223 if (GlobalMergeMinDataSize.getNumOccurrences())
225 else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
226 Opt.MinSize = *SDL + 1;
227 else
228 Opt.MinSize = 0;
229
230 GlobalMergeImpl P(TM, Opt);
231 return P.run(M);
232 }
233 bool runOnFunction(Function &F) override { return false; }
234
235 StringRef getPassName() const override { return "Merge internal globals"; }
236
237 void getAnalysisUsage(AnalysisUsage &AU) const override {
238 AU.setPreservesCFG();
240 }
241};
242
243} // end anonymous namespace
244
246 GlobalMergeImpl P(TM, Options);
247 bool Changed = P.run(M);
248 if (!Changed)
249 return PreservedAnalyses::all();
250
253 return PA;
254}
255
256char GlobalMerge::ID = 0;
257
258INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
259
260bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
261 Module &M, bool isConst,
262 unsigned AddrSpace) const {
263 auto &DL = M.getDataLayout();
264 // FIXME: Find better heuristics
266 Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
267 // We don't support scalable global variables.
268 return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() <
269 DL.getTypeAllocSize(GV2->getValueType()).getFixedValue();
270 });
271
272 // If we want to just blindly group all globals together, do so.
273 if (!GlobalMergeGroupByUse || (Opt.MergeConstAggressive && isConst)) {
274 BitVector AllGlobals(Globals.size());
275 AllGlobals.set();
276 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
277 }
278
279 // If we want to be smarter, look at all uses of each global, to try to
280 // discover all sets of globals used together, and how many times each of
281 // these sets occurred.
282 //
283 // Keep this reasonably efficient, by having an append-only list of all sets
284 // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
285 // code (currently, a Function) to the set of globals seen so far that are
286 // used together in that unit (GlobalUsesByFunction).
287 //
288 // When we look at the Nth global, we know that any new set is either:
289 // - the singleton set {N}, containing this global only, or
290 // - the union of {N} and a previously-discovered set, containing some
291 // combination of the previous N-1 globals.
292 // Using that knowledge, when looking at the Nth global, we can keep:
293 // - a reference to the singleton set {N} (CurGVOnlySetIdx)
294 // - a list mapping each previous set to its union with {N} (EncounteredUGS),
295 // if it actually occurs.
296
297 // We keep track of the sets of globals used together "close enough".
298 struct UsedGlobalSet {
299 BitVector Globals;
300 unsigned UsageCount = 1;
301
302 UsedGlobalSet(size_t Size) : Globals(Size) {}
303 };
304
305 // Each set is unique in UsedGlobalSets.
306 std::vector<UsedGlobalSet> UsedGlobalSets;
307
308 // Avoid repeating the create-global-set pattern.
309 auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
310 UsedGlobalSets.emplace_back(Globals.size());
311 return UsedGlobalSets.back();
312 };
313
314 // The first set is the empty set.
315 CreateGlobalSet().UsageCount = 0;
316
317 // We define "close enough" to be "in the same function".
318 // FIXME: Grouping uses by function is way too aggressive, so we should have
319 // a better metric for distance between uses.
320 // The obvious alternative would be to group by BasicBlock, but that's in
321 // turn too conservative..
322 // Anything in between wouldn't be trivial to compute, so just stick with
323 // per-function grouping.
324
325 // The value type is an index into UsedGlobalSets.
326 // The default (0) conveniently points to the empty set.
327 DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
328
329 // Now, look at each merge-eligible global in turn.
330
331 // Keep track of the sets we already encountered to which we added the
332 // current global.
333 // Each element matches the same-index element in UsedGlobalSets.
334 // This lets us efficiently tell whether a set has already been expanded to
335 // include the current global.
336 std::vector<size_t> EncounteredUGS;
337
338 for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
339 GlobalVariable *GV = Globals[GI];
340
341 // Reset the encountered sets for this global and grow it in case we created
342 // new sets for the previous global.
343 EncounteredUGS.assign(UsedGlobalSets.size(), 0);
344
345 // We might need to create a set that only consists of the current global.
346 // Keep track of its index into UsedGlobalSets.
347 size_t CurGVOnlySetIdx = 0;
348
349 // For each global, look at all its Uses.
350 for (auto &U : GV->uses()) {
351 // This Use might be a ConstantExpr. We're interested in Instruction
352 // users, so look through ConstantExpr...
353 Use *UI, *UE;
354 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
355 if (CE->use_empty())
356 continue;
357 UI = &*CE->use_begin();
358 UE = nullptr;
359 } else if (isa<Instruction>(U.getUser())) {
360 UI = &U;
361 UE = UI->getNext();
362 } else {
363 continue;
364 }
365
366 // ...to iterate on all the instruction users of the global.
367 // Note that we iterate on Uses and not on Users to be able to getNext().
368 for (; UI != UE; UI = UI->getNext()) {
369 Instruction *I = dyn_cast<Instruction>(UI->getUser());
370 if (!I)
371 continue;
372
373 Function *ParentFn = I->getParent()->getParent();
374
375 // If we're only optimizing for size, ignore non-minsize functions.
376 if (Opt.SizeOnly && !ParentFn->hasMinSize())
377 continue;
378
379 size_t UGSIdx = GlobalUsesByFunction[ParentFn];
380
381 // If this is the first global the basic block uses, map it to the set
382 // consisting of this global only.
383 if (!UGSIdx) {
384 // If that set doesn't exist yet, create it.
385 if (!CurGVOnlySetIdx) {
386 CurGVOnlySetIdx = UsedGlobalSets.size();
387 CreateGlobalSet().Globals.set(GI);
388 } else {
389 ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
390 }
391
392 GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
393 continue;
394 }
395
396 // If we already encountered this BB, just increment the counter.
397 if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
398 ++UsedGlobalSets[UGSIdx].UsageCount;
399 continue;
400 }
401
402 // If not, the previous set wasn't actually used in this function.
403 --UsedGlobalSets[UGSIdx].UsageCount;
404
405 // If we already expanded the previous set to include this global, just
406 // reuse that expanded set.
407 if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
408 ++UsedGlobalSets[ExpandedIdx].UsageCount;
409 GlobalUsesByFunction[ParentFn] = ExpandedIdx;
410 continue;
411 }
412
413 // If not, create a new set consisting of the union of the previous set
414 // and this global. Mark it as encountered, so we can reuse it later.
415 GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
416 UsedGlobalSets.size();
417
418 UsedGlobalSet &NewUGS = CreateGlobalSet();
419 NewUGS.Globals.set(GI);
420 NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
421 }
422 }
423 }
424
425 // Now we found a bunch of sets of globals used together. We accumulated
426 // the number of times we encountered the sets (i.e., the number of blocks
427 // that use that exact set of globals).
428 //
429 // Multiply that by the size of the set to give us a crude profitability
430 // metric.
431 llvm::stable_sort(UsedGlobalSets,
432 [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
433 return UGS1.Globals.count() * UGS1.UsageCount <
434 UGS2.Globals.count() * UGS2.UsageCount;
435 });
436
437 // We can choose to merge all globals together, but ignore globals never used
438 // with another global. This catches the obviously non-profitable cases of
439 // having a single global, but is aggressive enough for any other case.
441 BitVector AllGlobals(Globals.size());
442 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
443 if (UGS.UsageCount == 0)
444 continue;
445 if (UGS.Globals.count() > 1)
446 AllGlobals |= UGS.Globals;
447 }
448 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
449 }
450
451 // Starting from the sets with the best (=biggest) profitability, find a
452 // good combination.
453 // The ideal (and expensive) solution can only be found by trying all
454 // combinations, looking for the one with the best profitability.
455 // Don't be smart about it, and just pick the first compatible combination,
456 // starting with the sets with the best profitability.
457 BitVector PickedGlobals(Globals.size());
458 bool Changed = false;
459
460 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
461 if (UGS.UsageCount == 0)
462 continue;
463 if (PickedGlobals.anyCommon(UGS.Globals))
464 continue;
465 PickedGlobals |= UGS.Globals;
466 // If the set only contains one global, there's no point in merging.
467 // Ignore the global for inclusion in other sets though, so keep it in
468 // PickedGlobals.
469 if (UGS.Globals.count() < 2)
470 continue;
471 Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
472 }
473
474 return Changed;
475}
476
477bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
478 const BitVector &GlobalSet, Module &M,
479 bool isConst, unsigned AddrSpace) const {
480 assert(Globals.size() > 1);
481
482 Type *Int32Ty = Type::getInt32Ty(M.getContext());
483 Type *Int8Ty = Type::getInt8Ty(M.getContext());
484 auto &DL = M.getDataLayout();
485
486 LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
487 << GlobalSet.find_first() << ", total of " << Globals.size()
488 << "\n");
489
490 bool Changed = false;
491 ssize_t i = GlobalSet.find_first();
492 while (i != -1) {
493 ssize_t j = 0;
494 uint64_t MergedSize = 0;
495 std::vector<Type*> Tys;
496 std::vector<Constant*> Inits;
497 std::vector<unsigned> StructIdxs;
498
499 bool HasExternal = false;
500 StringRef FirstExternalName;
501 Align MaxAlign;
502 unsigned CurIdx = 0;
503 for (j = i; j != -1; j = GlobalSet.find_next(j)) {
504 Type *Ty = Globals[j]->getValueType();
505
506 // Make sure we use the same alignment AsmPrinter would use.
507 Align Alignment = DL.getPreferredAlign(Globals[j]);
508 unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
509 MergedSize += Padding;
510 MergedSize += DL.getTypeAllocSize(Ty);
511 if (MergedSize > Opt.MaxOffset) {
512 break;
513 }
514 if (Padding) {
515 Tys.push_back(ArrayType::get(Int8Ty, Padding));
516 Inits.push_back(ConstantAggregateZero::get(Tys.back()));
517 ++CurIdx;
518 }
519 Tys.push_back(Ty);
520 Inits.push_back(Globals[j]->getInitializer());
521 StructIdxs.push_back(CurIdx++);
522
523 MaxAlign = std::max(MaxAlign, Alignment);
524
525 if (Globals[j]->hasExternalLinkage() && !HasExternal) {
526 HasExternal = true;
527 FirstExternalName = Globals[j]->getName();
528 }
529 }
530
531 // Exit early if there is only one global to merge.
532 if (Tys.size() < 2) {
533 i = j;
534 continue;
535 }
536
537 // If merged variables doesn't have external linkage, we needn't to expose
538 // the symbol after merging.
542 // Use a packed struct so we can control alignment.
543 StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
544 Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
545
546 // On Darwin external linkage needs to be preserved, otherwise
547 // dsymutil cannot preserve the debug info for the merged
548 // variables. If they have external linkage, use the symbol name
549 // of the first variable merged as the suffix of global symbol
550 // name. This avoids a link-time naming conflict for the
551 // _MergedGlobals symbols.
552 Twine MergedName =
553 (IsMachO && HasExternal)
554 ? "_MergedGlobals_" + FirstExternalName
555 : "_MergedGlobals";
556 auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
557 auto *MergedGV = new GlobalVariable(
558 M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
560
561 MergedGV->setAlignment(MaxAlign);
562 MergedGV->setSection(Globals[i]->getSection());
563
564 LLVM_DEBUG(dbgs() << "MergedGV: " << *MergedGV << "\n");
565
566 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
567 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
568 GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
569 std::string Name(Globals[k]->getName());
570 GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
572 Globals[k]->getDLLStorageClass();
573
574 // Copy metadata while adjusting any debug info metadata by the original
575 // global's offset within the merged global.
576 MergedGV->copyMetadata(Globals[k],
577 MergedLayout->getElementOffset(StructIdxs[idx]));
578
579 Constant *Idx[2] = {
580 ConstantInt::get(Int32Ty, 0),
581 ConstantInt::get(Int32Ty, StructIdxs[idx]),
582 };
583 Constant *GEP =
584 ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
585 Globals[k]->replaceAllUsesWith(GEP);
586 Globals[k]->eraseFromParent();
587
588 // Emit an alias for the original variable name. This is necessary for an
589 // external symbol, as it may be accessed from another object. For
590 // internal symbols, it's not strictly required, but it's useful.
591 //
592 // This _should_ also work on Mach-O ever since '.alt_entry' support was
593 // added in 2016. Unfortunately, there's a bug in ld-prime (present at
594 // least from Xcode 15.0 through Xcode 16.0), in which -dead_strip doesn't
595 // always honor alt_entry. To workaround this issue, we don't emit aliases
596 // on Mach-O. Except, we _must_ do so for external symbols. That means
597 // MergeExternal is broken with that linker. (That option is currently off
598 // by default on MachO).
599 if (!IsMachO || Linkage == GlobalValue::ExternalLinkage) {
600 GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
601 Linkage, Name, GEP, &M);
602 GA->setVisibility(Visibility);
603 GA->setDLLStorageClass(DLLStorage);
604 }
605
606 NumMerged++;
607 }
608 Changed = true;
609 i = j;
610 }
611
612 return Changed;
613}
614
615void GlobalMergeImpl::collectUsedGlobalVariables(Module &M, StringRef Name) {
616 // Extract global variables from llvm.used array
617 const GlobalVariable *GV = M.getGlobalVariable(Name);
618 if (!GV || !GV->hasInitializer()) return;
619
620 // Should be an array of 'i8*'.
621 const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
622
623 for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
624 if (const GlobalVariable *G =
625 dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
626 MustKeepGlobalVariables.insert(G);
627}
628
629void GlobalMergeImpl::setMustKeepGlobalVariables(Module &M) {
630 collectUsedGlobalVariables(M, "llvm.used");
631 collectUsedGlobalVariables(M, "llvm.compiler.used");
632
633 for (Function &F : M) {
634 for (BasicBlock &BB : F) {
635 Instruction *Pad = BB.getFirstNonPHI();
636 auto *II = dyn_cast<IntrinsicInst>(Pad);
637 if (!Pad->isEHPad() &&
638 !(II && II->getIntrinsicID() == Intrinsic::eh_typeid_for))
639 continue;
640
641 // Keep globals used by landingpads, catchpads,
642 // or intrinsics that require a plain global.
643 for (const Use &U : Pad->operands()) {
644 if (const GlobalVariable *GV =
645 dyn_cast<GlobalVariable>(U->stripPointerCasts()))
646 MustKeepGlobalVariables.insert(GV);
647 else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
648 for (const Use &Elt : CA->operands()) {
649 if (const GlobalVariable *GV =
650 dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
651 MustKeepGlobalVariables.insert(GV);
652 }
653 }
654 }
655 }
656 }
657}
658
659// This function returns true if the given data Section name has custom
660// subsection-splitting semantics in Mach-O (such as splitting by a fixed size)
661//
662// See also ObjFile::parseSections and getRecordSize in lld/MachO/InputFiles.cpp
663static bool isSpecialMachOSection(StringRef Section) {
664 // Uses starts_with, since section attributes can appear at the end of the
665 // name.
666 return Section.starts_with("__DATA,__cfstring") ||
667 Section.starts_with("__DATA,__objc_classrefs") ||
668 Section.starts_with("__DATA,__objc_selrefs");
669}
670
671bool GlobalMergeImpl::run(Module &M) {
673 return false;
674
675 IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
676
677 auto &DL = M.getDataLayout();
679 Globals, ConstGlobals, BSSGlobals;
680 bool Changed = false;
681 setMustKeepGlobalVariables(M);
682
683 LLVM_DEBUG({
684 dbgs() << "Number of GV that must be kept: " <<
685 MustKeepGlobalVariables.size() << "\n";
686 for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
687 dbgs() << "Kept: " << *KeptGV << "\n";
688 });
689 // Grab all non-const globals.
690 for (auto &GV : M.globals()) {
691 // Merge is safe for "normal" internal or external globals only
692 if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
693 continue;
694
695 // It's not safe to merge globals that may be preempted
696 if (TM && !TM->shouldAssumeDSOLocal(&GV))
697 continue;
698
699 if (!(Opt.MergeExternal && GV.hasExternalLinkage()) &&
700 !GV.hasLocalLinkage())
701 continue;
702
703 PointerType *PT = dyn_cast<PointerType>(GV.getType());
704 assert(PT && "Global variable is not a pointer!");
705
706 unsigned AddressSpace = PT->getAddressSpace();
708
709 // On Mach-O, some section names have special semantics. Don't merge these.
710 if (IsMachO && isSpecialMachOSection(Section))
711 continue;
712
713 // Ignore all 'special' globals.
714 if (GV.getName().starts_with("llvm.") || GV.getName().starts_with(".llvm."))
715 continue;
716
717 // Ignore all "required" globals:
718 if (isMustKeepGlobalVariable(&GV))
719 continue;
720
721 // Don't merge tagged globals, as each global should have its own unique
722 // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
723 // with compatible alignment and the same contents may be merged as long as
724 // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
725 // size_b / 16`).
726 if (GV.isTagged())
727 continue;
728
729 Type *Ty = GV.getValueType();
730 TypeSize AllocSize = DL.getTypeAllocSize(Ty);
731 if (AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize) {
732 if (TM &&
734 BSSGlobals[{AddressSpace, Section}].push_back(&GV);
735 else if (GV.isConstant())
736 ConstGlobals[{AddressSpace, Section}].push_back(&GV);
737 else
738 Globals[{AddressSpace, Section}].push_back(&GV);
739 }
740 LLVM_DEBUG(dbgs() << "GV "
741 << ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset)
742 ? "to merge: "
743 : "not to merge: ")
744 << GV << "\n");
745 }
746
747 for (auto &P : Globals)
748 if (P.second.size() > 1)
749 Changed |= doMerge(P.second, M, false, P.first.first);
750
751 for (auto &P : BSSGlobals)
752 if (P.second.size() > 1)
753 Changed |= doMerge(P.second, M, false, P.first.first);
754
755 if (Opt.MergeConstantGlobals)
756 for (auto &P : ConstGlobals)
757 if (P.second.size() > 1)
758 Changed |= doMerge(P.second, M, true, P.first.first);
759
760 return Changed;
761}
762
764 bool OnlyOptimizeForSize,
765 bool MergeExternalByDefault,
766 bool MergeConstantByDefault,
767 bool MergeConstAggressiveByDefault) {
768 bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
769 MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
770 bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
771 bool MergeConstAggressive = GlobalMergeAllConst.getNumOccurrences() > 0
773 : MergeConstAggressiveByDefault;
774 return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
775 MergeConstant, MergeConstAggressive);
776}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
uint64_t Size
#define DEBUG_TYPE
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
static bool isSpecialMachOSection(StringRef Section)
static cl::opt< bool > GlobalMergeAllConst("global-merge-all-const", cl::Hidden, cl::desc("Merge all const globals without looking at uses"), cl::init(false))
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
static cl::opt< unsigned > GlobalMergeMinDataSize("global-merge-min-data-size", cl::desc("The minimum size in bytes of each global " "that should considered in merging."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:300
bool back() const
Return the last element in the vector.
Definition: BitVector.h:456
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:308
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
ConstantArray - Constant Array Declarations.
Definition: Constants.h:427
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1294
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1378
This is an important base class in LLVM.
Definition: Constant.h:42
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:557
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:117
bool hasExternalLinkage() const
Definition: GlobalValue.h:511
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:296
bool hasLocalLinkage() const
Definition: GlobalValue.h:528
bool isTagged() const
Definition: GlobalValue.h:365
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:284
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition: GlobalValue.h:73
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:66
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool hasImplicitSection() const
Check if section name is present.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:833
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:567
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:596
Class to represent struct types.
Definition: DerivedTypes.h:218
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:743
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:288
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
@ SDL
Definition: COFF.h:833
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition: ELF.h:534
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false, bool MergeConstantByDefault=false, bool MergeConstAggressiveByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
void initializeGlobalMergePass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:865
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool MergeConstAggressive
Whether we should merge constant global variables aggressively without looking at use.
Definition: GlobalMerge.h:35
bool SizeOnly
Whether we should try to optimize for size only.
Definition: GlobalMerge.h:40
bool MergeExternal
Whether we should merge global variables that have external linkage.
Definition: GlobalMerge.h:30
bool MergeConstantGlobals
Whether we should merge constant global variables.
Definition: GlobalMerge.h:32