LLVM 23.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges globals with internal linkage into one. This way all the
10// globals which were merged into a biggest one can be addressed using offsets
11// from the same base pointer (no need for separate base pointer for each of the
12// global). Such a transformation can significantly reduce the register pressure
13// when many globals are involved.
14//
15// For example, consider the code which touches several global variables at
16// once:
17//
18// static int foo[N], bar[N], baz[N];
19//
20// for (i = 0; i < N; ++i) {
21// foo[i] = bar[i] * baz[i];
22// }
23//
24// On ARM the addresses of 3 arrays should be kept in the registers, thus
25// this code has quite large register pressure (loop body):
26//
27// ldr r1, [r5], #4
28// ldr r2, [r6], #4
29// mul r1, r2, r1
30// str r1, [r0], #4
31//
32// Pass converts the code to something like:
33//
34// static struct {
35// int foo[N];
36// int bar[N];
37// int baz[N];
38// } merged;
39//
40// for (i = 0; i < N; ++i) {
41// merged.foo[i] = merged.bar[i] * merged.baz[i];
42// }
43//
44// and in ARM code this becomes:
45//
46// ldr r0, [r5, #40]
47// ldr r1, [r5, #80]
48// mul r0, r1, r0
49// str r0, [r5], #4
50//
51// note that we saved 2 registers here almostly "for free".
52//
53// However, merging globals can have tradeoffs:
54// - it confuses debuggers, tools, and users
55// - it makes linker optimizations less useful (order files, LOHs, ...)
56// - it forces usage of indexed addressing (which isn't necessarily "free")
57// - it can increase register pressure when the uses are disparate enough.
58//
59// We use heuristics to discover the best global grouping we can (cf cl::opts).
60//
61// ===---------------------------------------------------------------------===//
62
64#include "llvm/ADT/BitVector.h"
65#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/MapVector.h"
67#include "llvm/ADT/SetVector.h"
69#include "llvm/ADT/Statistic.h"
70#include "llvm/ADT/StringRef.h"
71#include "llvm/ADT/Twine.h"
72#include "llvm/CodeGen/Passes.h"
73#include "llvm/IR/BasicBlock.h"
74#include "llvm/IR/Constants.h"
75#include "llvm/IR/DataLayout.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/Instruction.h"
83#include "llvm/IR/LLVMContext.h"
84#include "llvm/IR/Module.h"
85#include "llvm/IR/Type.h"
86#include "llvm/IR/Use.h"
87#include "llvm/IR/User.h"
89#include "llvm/MC/SectionKind.h"
90#include "llvm/Pass.h"
93#include "llvm/Support/Debug.h"
99#include <algorithm>
100#include <cassert>
101#include <cstddef>
102#include <cstdint>
103#include <string>
104#include <vector>
105
106using namespace llvm;
107
108#define DEBUG_TYPE "global-merge"
109
110// FIXME: This is only useful as a last-resort way to disable the pass.
111static cl::opt<bool>
112EnableGlobalMerge("enable-global-merge", cl::Hidden,
113 cl::desc("Enable the global merge pass"),
114 cl::init(true));
115
117GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
118 cl::desc("Set maximum offset for global merge pass"),
119 cl::init(0));
120
122 "global-merge-group-by-use", cl::Hidden,
123 cl::desc("Improve global merge pass to look at uses"), cl::init(true));
124
126 "global-merge-all-const", cl::Hidden,
127 cl::desc("Merge all const globals without looking at uses"),
128 cl::init(false));
129
131 "global-merge-ignore-single-use", cl::Hidden,
132 cl::desc("Improve global merge pass to ignore globals only used alone"),
133 cl::init(true));
134
135static cl::opt<bool>
136EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
137 cl::desc("Enable global merge pass on constants"),
138 cl::init(false));
139
140// FIXME: this could be a transitional option, and we probably need to remove
141// it if only we are sure this optimization could always benefit all targets.
143EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
144 cl::desc("Enable global merge pass on external linkage"));
145
147 GlobalMergeMinDataSize("global-merge-min-data-size",
148 cl::desc("The minimum size in bytes of each global "
149 "that should considered in merging."),
150 cl::init(0), cl::Hidden);
151
152STATISTIC(NumMerged, "Number of globals merged");
153
154namespace {
155
156class GlobalMergeImpl {
157 const TargetMachine *TM = nullptr;
159 bool IsMachO = false;
160
161private:
162 bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
163 bool isConst, unsigned AddrSpace) const;
164
165 /// Merge everything in \p Globals for which the corresponding bit
166 /// in \p GlobalSet is set.
167 bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
168 const BitVector &GlobalSet, Module &M, bool isConst,
169 unsigned AddrSpace) const;
170
171 /// Check if the given variable has been identified as must keep
172 /// \pre setMustKeepGlobalVariables must have been called on the Module that
173 /// contains GV
174 bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
175 return MustKeepGlobalVariables.count(GV);
176 }
177
178 /// Collect every variables marked as "used" or used in a landing pad
179 /// instruction for this Module.
180 void setMustKeepGlobalVariables(Module &M);
181
182 /// Collect every variables marked as "used"
183 void collectUsedGlobalVariables(Module &M, StringRef Name);
184
185 /// Keep track of the GlobalVariable that must not be merged away
186 SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
187
188public:
189 GlobalMergeImpl(const TargetMachine *TM, GlobalMergeOptions Opt)
190 : TM(TM), Opt(Opt) {}
191 bool run(Module &M);
192};
193
194class GlobalMerge : public FunctionPass {
195 const TargetMachine *TM = nullptr;
196 GlobalMergeOptions Opt;
197
198public:
199 static char ID; // Pass identification, replacement for typeid.
200
201 explicit GlobalMerge() : FunctionPass(ID) {
202 Opt.MaxOffset = GlobalMergeMaxOffset;
203 Opt.MergeConstantGlobals = EnableGlobalMergeOnConst;
204 Opt.MergeConstAggressive = GlobalMergeAllConst;
205 }
206
207 explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
208 bool OnlyOptimizeForSize, bool MergeExternalGlobals,
209 bool MergeConstantGlobals, bool MergeConstAggressive)
210 : FunctionPass(ID), TM(TM) {
211 Opt.MaxOffset = MaximalOffset;
212 Opt.SizeOnly = OnlyOptimizeForSize;
213 Opt.MergeExternal = MergeExternalGlobals;
214 Opt.MergeConstantGlobals = MergeConstantGlobals;
215 Opt.MergeConstAggressive = MergeConstAggressive;
216 }
217
218 bool doInitialization(Module &M) override {
219 auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
220 Metadata *SDL = M.getModuleFlag("SmallDataLimit");
221 if (!SDL)
222 return std::nullopt;
223 return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
224 };
225 if (GlobalMergeMinDataSize.getNumOccurrences())
226 Opt.MinSize = GlobalMergeMinDataSize;
227 else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
228 Opt.MinSize = *SDL + 1;
229 else
230 Opt.MinSize = 0;
231
232 GlobalMergeImpl P(TM, Opt);
233 return P.run(M);
234 }
235 bool runOnFunction(Function &F) override { return false; }
236
237 StringRef getPassName() const override { return "Merge internal globals"; }
238
239 void getAnalysisUsage(AnalysisUsage &AU) const override {
240 AU.setPreservesCFG();
241 FunctionPass::getAnalysisUsage(AU);
242 }
243};
244
245} // end anonymous namespace
246
248 GlobalMergeImpl P(TM, Options);
249 bool Changed = P.run(M);
250 if (!Changed)
251 return PreservedAnalyses::all();
252
255 return PA;
256}
257
258char GlobalMerge::ID = 0;
259
260INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
261
262bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
263 Module &M, bool isConst,
264 unsigned AddrSpace) const {
265 auto &DL = M.getDataLayout();
266 // FIXME: Find better heuristics
268 Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
269 // We don't support scalable global variables.
270 return GV1->getGlobalSize(DL) < GV2->getGlobalSize(DL);
271 });
272
273 // If we want to just blindly group all globals together, do so.
274 if (!GlobalMergeGroupByUse || (Opt.MergeConstAggressive && isConst)) {
275 BitVector AllGlobals(Globals.size(), true);
276 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
277 }
278
279 // If we want to be smarter, look at all uses of each global, to try to
280 // discover all sets of globals used together, and how many times each of
281 // these sets occurred.
282 //
283 // Keep this reasonably efficient, by having an append-only list of all sets
284 // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
285 // code (currently, a Function) to the set of globals seen so far that are
286 // used together in that unit (GlobalUsesByFunction).
287 //
288 // When we look at the Nth global, we know that any new set is either:
289 // - the singleton set {N}, containing this global only, or
290 // - the union of {N} and a previously-discovered set, containing some
291 // combination of the previous N-1 globals.
292 // Using that knowledge, when looking at the Nth global, we can keep:
293 // - a reference to the singleton set {N} (CurGVOnlySetIdx)
294 // - a list mapping each previous set to its union with {N} (EncounteredUGS),
295 // if it actually occurs.
296
297 // We keep track of the sets of globals used together "close enough".
298 struct UsedGlobalSet {
299 BitVector Globals;
300 unsigned UsageCount = 1;
301
302 UsedGlobalSet(size_t Size) : Globals(Size) {}
303 };
304
305 // Each set is unique in UsedGlobalSets.
306 std::vector<UsedGlobalSet> UsedGlobalSets;
307
308 // Avoid repeating the create-global-set pattern.
309 auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
310 UsedGlobalSets.emplace_back(Globals.size());
311 return UsedGlobalSets.back();
312 };
313
314 // The first set is the empty set.
315 CreateGlobalSet().UsageCount = 0;
316
317 // We define "close enough" to be "in the same function".
318 // FIXME: Grouping uses by function is way too aggressive, so we should have
319 // a better metric for distance between uses.
320 // The obvious alternative would be to group by BasicBlock, but that's in
321 // turn too conservative..
322 // Anything in between wouldn't be trivial to compute, so just stick with
323 // per-function grouping.
324
325 // The value type is an index into UsedGlobalSets.
326 // The default (0) conveniently points to the empty set.
327 DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
328
329 // Now, look at each merge-eligible global in turn.
330
331 // Keep track of the sets we already encountered to which we added the
332 // current global.
333 // Each element matches the same-index element in UsedGlobalSets.
334 // This lets us efficiently tell whether a set has already been expanded to
335 // include the current global.
336 std::vector<size_t> EncounteredUGS;
337
338 for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
339 GlobalVariable *GV = Globals[GI];
340
341 // Reset the encountered sets for this global and grow it in case we created
342 // new sets for the previous global.
343 EncounteredUGS.assign(UsedGlobalSets.size(), 0);
344
345 // We might need to create a set that only consists of the current global.
346 // Keep track of its index into UsedGlobalSets.
347 size_t CurGVOnlySetIdx = 0;
348
349 // For each global, look at all its Uses.
350 for (auto &U : GV->uses()) {
351 // This Use might be a ConstantExpr. We're interested in Instruction
352 // users, so look through ConstantExpr...
353 Use *UI, *UE;
354 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
355 if (CE->use_empty())
356 continue;
357 UI = &*CE->use_begin();
358 UE = nullptr;
359 } else if (isa<Instruction>(U.getUser())) {
360 UI = &U;
361 UE = UI->getNext();
362 } else {
363 continue;
364 }
365
366 // ...to iterate on all the instruction users of the global.
367 // Note that we iterate on Uses and not on Users to be able to getNext().
368 for (; UI != UE; UI = UI->getNext()) {
369 Instruction *I = dyn_cast<Instruction>(UI->getUser());
370 if (!I)
371 continue;
372
373 Function *ParentFn = I->getParent()->getParent();
374
375 // If we're only optimizing for size, ignore non-minsize functions.
376 if (Opt.SizeOnly && !ParentFn->hasMinSize())
377 continue;
378
379 size_t UGSIdx = GlobalUsesByFunction[ParentFn];
380
381 // If this is the first global the function uses, map it to the set
382 // consisting of this global only.
383 if (!UGSIdx) {
384 // If that set doesn't exist yet, create it.
385 if (!CurGVOnlySetIdx) {
386 CurGVOnlySetIdx = UsedGlobalSets.size();
387 CreateGlobalSet().Globals.set(GI);
388 } else {
389 ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
390 }
391
392 GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
393 continue;
394 }
395
396 // If we already encountered a use of this global in this function, just
397 // increment the counter.
398 if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
399 ++UsedGlobalSets[UGSIdx].UsageCount;
400 continue;
401 }
402
403 // If not, the previous set wasn't actually used in this function.
404 --UsedGlobalSets[UGSIdx].UsageCount;
405
406 // If we already expanded the previous set to include this global, just
407 // reuse that expanded set.
408 if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
409 ++UsedGlobalSets[ExpandedIdx].UsageCount;
410 GlobalUsesByFunction[ParentFn] = ExpandedIdx;
411 continue;
412 }
413
414 // If not, create a new set consisting of the union of the previous set
415 // and this global. Mark it as encountered, so we can reuse it later.
416 GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
417 UsedGlobalSets.size();
418
419 UsedGlobalSet &NewUGS = CreateGlobalSet();
420 NewUGS.Globals.set(GI);
421 NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
422 }
423 }
424 }
425
426 // We can choose to merge all globals together, but ignore globals never used
427 // with another global. This catches the obviously non-profitable cases of
428 // having a single global, but is aggressive enough for any other case.
430 BitVector AllGlobals(Globals.size());
431 for (const UsedGlobalSet &UGS : UsedGlobalSets) {
432 if (UGS.UsageCount == 0)
433 continue;
434 if (UGS.Globals.count() > 1)
435 AllGlobals |= UGS.Globals;
436 }
437 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
438 }
439
440 // Now we found a bunch of sets of globals used together. We accumulated
441 // the number of times we encountered the sets (i.e., the number of functions
442 // that use that exact set of globals).
443 //
444 // Multiply that by the size of the set to give us a crude profitability
445 // metric.
446 llvm::stable_sort(UsedGlobalSets,
447 [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
448 return UGS1.Globals.count() * UGS1.UsageCount <
449 UGS2.Globals.count() * UGS2.UsageCount;
450 });
451
452 // Starting from the sets with the best (=biggest) profitability, find a
453 // good combination.
454 // The ideal (and expensive) solution can only be found by trying all
455 // combinations, looking for the one with the best profitability.
456 // Don't be smart about it, and just pick the first compatible combination,
457 // starting with the sets with the best profitability.
458 BitVector PickedGlobals(Globals.size());
459 bool Changed = false;
460
461 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
462 if (UGS.UsageCount == 0)
463 continue;
464 if (PickedGlobals.anyCommon(UGS.Globals))
465 continue;
466 PickedGlobals |= UGS.Globals;
467 // If the set only contains one global, there's no point in merging.
468 // Ignore the global for inclusion in other sets though, so keep it in
469 // PickedGlobals.
470 if (UGS.Globals.count() < 2)
471 continue;
472 Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
473 }
474
475 return Changed;
476}
477
478bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
479 const BitVector &GlobalSet, Module &M,
480 bool isConst, unsigned AddrSpace) const {
481 assert(Globals.size() > 1);
482
483 Type *Int32Ty = Type::getInt32Ty(M.getContext());
484 Type *Int8Ty = Type::getInt8Ty(M.getContext());
485 auto &DL = M.getDataLayout();
486
487 LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
488 << GlobalSet.find_first() << ", total of " << Globals.size()
489 << "\n");
490
491 bool Changed = false;
492 ssize_t i = GlobalSet.find_first();
493 while (i != -1) {
494 ssize_t j = 0;
495 uint64_t MergedSize = 0;
496 std::vector<Type*> Tys;
497 std::vector<Constant*> Inits;
498 std::vector<unsigned> StructIdxs;
499
500 bool HasExternal = false;
501 StringRef FirstExternalName;
502 Align MaxAlign;
503 unsigned CurIdx = 0;
504 for (j = i; j != -1; j = GlobalSet.find_next(j)) {
505 Type *Ty = Globals[j]->getValueType();
506
507 // Make sure we use the same alignment AsmPrinter would use.
508 Align Alignment = DL.getPreferredAlign(Globals[j]);
509 unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
510 MergedSize += Padding;
511 MergedSize += DL.getTypeAllocSize(Ty);
512 if (MergedSize > Opt.MaxOffset) {
513 break;
514 }
515 if (Padding) {
516 Tys.push_back(ArrayType::get(Int8Ty, Padding));
517 Inits.push_back(ConstantAggregateZero::get(Tys.back()));
518 ++CurIdx;
519 }
520 Tys.push_back(Ty);
521 Inits.push_back(Globals[j]->getInitializer());
522 StructIdxs.push_back(CurIdx++);
523
524 MaxAlign = std::max(MaxAlign, Alignment);
525
526 if (Globals[j]->hasExternalLinkage() && !HasExternal) {
527 HasExternal = true;
528 FirstExternalName = Globals[j]->getName();
529 }
530 }
531
532 // Exit early if there is only one global to merge.
533 if (Tys.size() < 2) {
534 i = j;
535 continue;
536 }
537
538 // If merged variables doesn't have external linkage, we needn't to expose
539 // the symbol after merging.
543 // Use a packed struct so we can control alignment.
544 StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
545 Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
546
547 // On Darwin external linkage needs to be preserved, otherwise
548 // dsymutil cannot preserve the debug info for the merged
549 // variables. If they have external linkage, use the symbol name
550 // of the first variable merged as the suffix of global symbol
551 // name. This avoids a link-time naming conflict for the
552 // _MergedGlobals symbols.
553 Twine MergedName =
554 (IsMachO && HasExternal)
555 ? "_MergedGlobals_" + FirstExternalName
556 : "_MergedGlobals";
557 auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
558 auto *MergedGV = new GlobalVariable(
559 M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
561
562 MergedGV->setAlignment(MaxAlign);
563 MergedGV->setSection(Globals[i]->getSection());
564 MergedGV->setComdat(Globals[i]->getComdat());
565
566 LLVM_DEBUG(dbgs() << "MergedGV: " << *MergedGV << "\n");
567
568 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
569 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
570 GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
571 std::string Name(Globals[k]->getName());
572 GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
574 Globals[k]->getDLLStorageClass();
575
576 // Copy metadata while adjusting any debug info metadata by the original
577 // global's offset within the merged global.
578 MergedGV->copyMetadata(Globals[k],
579 MergedLayout->getElementOffset(StructIdxs[idx]));
580
581 Constant *Idx[2] = {
582 ConstantInt::get(Int32Ty, 0),
583 ConstantInt::get(Int32Ty, StructIdxs[idx]),
584 };
585 Constant *GEP =
586 ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
587 Globals[k]->replaceAllUsesWith(GEP);
588 Globals[k]->eraseFromParent();
589
590 // Emit an alias for the original variable name. This is necessary for an
591 // external symbol, as it may be accessed from another object. For
592 // internal symbols, it's not strictly required, but it's useful.
593 //
594 // This _should_ also work on Mach-O ever since '.alt_entry' support was
595 // added in 2016. Unfortunately, there's a bug in ld-prime (present at
596 // least from Xcode 15.0 through Xcode 16.0), in which -dead_strip doesn't
597 // always honor alt_entry. To workaround this issue, we don't emit aliases
598 // on Mach-O. Except, we _must_ do so for external symbols. That means
599 // MergeExternal is broken with that linker. (That option is currently off
600 // by default on MachO).
601 if (!IsMachO || Linkage == GlobalValue::ExternalLinkage) {
602 GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
603 Linkage, Name, GEP, &M);
604 GA->setVisibility(Visibility);
605 GA->setDLLStorageClass(DLLStorage);
606 }
607
608 NumMerged++;
609 }
611 Changed = true;
612 i = j;
613 }
614
615 return Changed;
616}
617
618void GlobalMergeImpl::collectUsedGlobalVariables(Module &M, StringRef Name) {
619 // Extract global variables from llvm.used array
620 const GlobalVariable *GV = M.getGlobalVariable(Name);
621 if (!GV || !GV->hasInitializer()) return;
622
623 // Should be an array of 'i8*'.
624 const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
625
626 for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
627 if (const GlobalVariable *G =
629 MustKeepGlobalVariables.insert(G);
630}
631
632void GlobalMergeImpl::setMustKeepGlobalVariables(Module &M) {
633 collectUsedGlobalVariables(M, "llvm.used");
634 collectUsedGlobalVariables(M, "llvm.compiler.used");
635
636 for (Function &F : M) {
637 for (BasicBlock &BB : F) {
638 BasicBlock::iterator Pad = BB.getFirstNonPHIIt();
639 auto *II = dyn_cast<IntrinsicInst>(Pad);
640 if (!Pad->isEHPad() &&
641 !(II && II->getIntrinsicID() == Intrinsic::eh_typeid_for))
642 continue;
643
644 // Keep globals used by landingpads, catchpads,
645 // or intrinsics that require a plain global.
646 for (const Use &U : Pad->operands()) {
647 if (const GlobalVariable *GV =
648 dyn_cast<GlobalVariable>(U->stripPointerCasts()))
649 MustKeepGlobalVariables.insert(GV);
650 else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
651 for (const Use &Elt : CA->operands()) {
652 if (const GlobalVariable *GV =
653 dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
654 MustKeepGlobalVariables.insert(GV);
655 }
656 }
657 }
658 }
659 }
660}
661
662// This function returns true if the given data Section name has custom
663// subsection-splitting semantics in Mach-O (such as splitting by a fixed size)
664//
665// See also ObjFile::parseSections and getRecordSize in lld/MachO/InputFiles.cpp
666static bool isSpecialMachOSection(StringRef Section) {
667 // Uses starts_with, since section attributes can appear at the end of the
668 // name.
669 return Section.starts_with("__DATA,__cfstring") ||
670 Section.starts_with("__DATA,__objc_classrefs") ||
671 Section.starts_with("__DATA,__objc_selrefs");
672}
673
674bool GlobalMergeImpl::run(Module &M) {
676 return false;
677
678 IsMachO = M.getTargetTriple().isOSBinFormatMachO();
679
680 auto &DL = M.getDataLayout();
683 Globals, ConstGlobals, BSSGlobals;
684 bool Changed = false;
685 setMustKeepGlobalVariables(M);
686
687 LLVM_DEBUG({
688 dbgs() << "Number of GV that must be kept: " <<
689 MustKeepGlobalVariables.size() << "\n";
690 for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
691 dbgs() << "Kept: " << *KeptGV << "\n";
692 });
693 // Grab all non-const globals.
694 for (auto &GV : M.globals()) {
695 // Merge is safe for "normal" internal or external globals only
696 if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
697 continue;
698
699 // It's not safe to merge globals that may be preempted
700 if (TM && !TM->shouldAssumeDSOLocal(&GV))
701 continue;
702
703 if (!(Opt.MergeExternal && GV.hasExternalLinkage()) &&
704 !GV.hasLocalLinkage())
705 continue;
706
707 PointerType *PT = GV.getType();
708 unsigned AddressSpace = PT->getAddressSpace();
710
711 // On Mach-O, some section names have special semantics. Don't merge these.
712 if (IsMachO && isSpecialMachOSection(Section))
713 continue;
714
715 // Ignore all 'special' globals.
716 if (GV.getName().starts_with("llvm.") ||
717 GV.getName().starts_with(".llvm.") || Section == "llvm.metadata")
718 continue;
719
720 // Ignore all "required" globals:
721 if (isMustKeepGlobalVariable(&GV))
722 continue;
723
724 // Don't merge tagged globals, as each global should have its own unique
725 // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
726 // with compatible alignment and the same contents may be merged as long as
727 // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
728 // size_b / 16`).
729 if (GV.isTagged())
730 continue;
731
732 // Don't merge globals with metadata other than !dbg or !guid, as this is
733 // essentially equivalent to adding metadata to an existing global, which is
734 // not necessarily a correct transformation depending on the specific
735 // metadata's semantics. We will later use copyMetadata() to copy metadata
736 // from component globals to the combined global, which only knows how to do
737 // this correctly for !dbg (and !type, but by this point LowerTypeTests will
738 // have already run).
739 // Note that a new !guid will be created as part of doMerge
741 continue;
742
743 Type *Ty = GV.getValueType();
744 TypeSize AllocSize = DL.getTypeAllocSize(Ty);
745 bool CanMerge = AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize;
746 if (CanMerge) {
747 if (TM &&
749 BSSGlobals[{AddressSpace, Section, GV.getComdat()}].push_back(&GV);
750 else if (GV.isConstant())
751 ConstGlobals[{AddressSpace, Section, GV.getComdat()}].push_back(&GV);
752 else
753 Globals[{AddressSpace, Section, GV.getComdat()}].push_back(&GV);
754 }
755 LLVM_DEBUG(dbgs() << "GV " << (CanMerge ? "" : "not ") << "to merge: " << GV
756 << "\n");
757 }
758
759 for (auto &P : Globals)
760 if (P.second.size() > 1)
761 Changed |= doMerge(P.second, M, false, std::get<0>(P.first));
762
763 for (auto &P : BSSGlobals)
764 if (P.second.size() > 1)
765 Changed |= doMerge(P.second, M, false, std::get<0>(P.first));
766
767 if (Opt.MergeConstantGlobals)
768 for (auto &P : ConstGlobals)
769 if (P.second.size() > 1)
770 Changed |= doMerge(P.second, M, true, std::get<0>(P.first));
771
772 return Changed;
773}
774
776 bool OnlyOptimizeForSize,
777 bool MergeExternalByDefault,
778 bool MergeConstantByDefault,
779 bool MergeConstAggressiveByDefault) {
780 bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
781 MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
782 bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
783 bool MergeConstAggressive = GlobalMergeAllConst.getNumOccurrences() > 0
785 : MergeConstAggressiveByDefault;
786 unsigned PreferOffset = GlobalMergeMaxOffset.getNumOccurrences() > 0
788 : Offset;
789 return new GlobalMerge(TM, PreferOffset, OnlyOptimizeForSize, MergeExternal,
790 MergeConstant, MergeConstAggressive);
791}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
CanMerge
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Finalize Linkage
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
static bool isSpecialMachOSection(StringRef Section)
static cl::opt< bool > GlobalMergeAllConst("global-merge-all-const", cl::Hidden, cl::desc("Merge all const globals without looking at uses"), cl::init(false))
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
static cl::opt< unsigned > GlobalMergeMinDataSize("global-merge-min-data-size", cl::desc("The minimum size in bytes of each global " "that should considered in merging."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static StringRef getName(Value *V)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static void assignGUIDForMergedGV(GlobalVariable &GV)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
Definition BitVector.h:317
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
Definition BitVector.h:324
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
ConstantArray - Constant Array Declarations.
Definition Constants.h:584
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1308
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1499
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:676
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
StringRef getSection() const
Get the custom section of this global if it has one.
const Comdat * getComdat() const
LLVM_ABI bool hasMetadataOtherThanDebugLocAndGuid() const
Definition Globals.cpp:455
bool hasExternalLinkage() const
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:392
bool hasLocalLinkage() const
bool isTagged() const
void setDLLStorageClass(DLLStorageClassTypes C)
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition GlobalValue.h:74
Module * getParent()
Get the module that this global value is contained inside of...
PointerType * getType() const
Global values are always pointers.
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition GlobalValue.h:67
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition GlobalValue.h:52
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool hasImplicitSection() const
Check if section name is present.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:624
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
Class to represent pointers.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition DataLayout.h:743
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:774
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:479
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Primary interface to the complete machine description for the target machine.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
Changed
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition ELF.h:608
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
void stable_sort(R &&Range)
Definition STLExtras.h:2115
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false, bool MergeConstantByDefault=false, bool MergeConstAggressiveByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
LLVM_ABI GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition Module.cpp:898
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
bool MergeConstAggressive
Whether we should merge constant global variables aggressively without looking at use.
Definition GlobalMerge.h:34
bool SizeOnly
Whether we should try to optimize for size only.
Definition GlobalMerge.h:39
bool MergeExternal
Whether we should merge global variables that have external linkage.
Definition GlobalMerge.h:29
bool MergeConstantGlobals
Whether we should merge constant global variables.
Definition GlobalMerge.h:31