LLVM 19.0.0git
GlobalMerge.cpp
Go to the documentation of this file.
1//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges globals with internal linkage into one. This way all the
10// globals which were merged into a biggest one can be addressed using offsets
11// from the same base pointer (no need for separate base pointer for each of the
12// global). Such a transformation can significantly reduce the register pressure
13// when many globals are involved.
14//
15// For example, consider the code which touches several global variables at
16// once:
17//
18// static int foo[N], bar[N], baz[N];
19//
20// for (i = 0; i < N; ++i) {
21// foo[i] = bar[i] * baz[i];
22// }
23//
24// On ARM the addresses of 3 arrays should be kept in the registers, thus
25// this code has quite large register pressure (loop body):
26//
27// ldr r1, [r5], #4
28// ldr r2, [r6], #4
29// mul r1, r2, r1
30// str r1, [r0], #4
31//
32// Pass converts the code to something like:
33//
34// static struct {
35// int foo[N];
36// int bar[N];
37// int baz[N];
38// } merged;
39//
40// for (i = 0; i < N; ++i) {
41// merged.foo[i] = merged.bar[i] * merged.baz[i];
42// }
43//
44// and in ARM code this becomes:
45//
46// ldr r0, [r5, #40]
47// ldr r1, [r5, #80]
48// mul r0, r1, r0
49// str r0, [r5], #4
50//
51// note that we saved 2 registers here almostly "for free".
52//
53// However, merging globals can have tradeoffs:
54// - it confuses debuggers, tools, and users
55// - it makes linker optimizations less useful (order files, LOHs, ...)
56// - it forces usage of indexed addressing (which isn't necessarily "free")
57// - it can increase register pressure when the uses are disparate enough.
58//
59// We use heuristics to discover the best global grouping we can (cf cl::opts).
60//
61// ===---------------------------------------------------------------------===//
62
64#include "llvm/ADT/BitVector.h"
65#include "llvm/ADT/DenseMap.h"
66#include "llvm/ADT/SetVector.h"
68#include "llvm/ADT/Statistic.h"
69#include "llvm/ADT/StringRef.h"
70#include "llvm/ADT/Twine.h"
71#include "llvm/CodeGen/Passes.h"
72#include "llvm/IR/BasicBlock.h"
73#include "llvm/IR/Constants.h"
74#include "llvm/IR/DataLayout.h"
76#include "llvm/IR/Function.h"
77#include "llvm/IR/GlobalAlias.h"
78#include "llvm/IR/GlobalValue.h"
80#include "llvm/IR/Instruction.h"
81#include "llvm/IR/Module.h"
82#include "llvm/IR/Type.h"
83#include "llvm/IR/Use.h"
84#include "llvm/IR/User.h"
86#include "llvm/MC/SectionKind.h"
87#include "llvm/Pass.h"
90#include "llvm/Support/Debug.h"
95#include <algorithm>
96#include <cassert>
97#include <cstddef>
98#include <cstdint>
99#include <string>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE "global-merge"
105
106// FIXME: This is only useful as a last-resort way to disable the pass.
107static cl::opt<bool>
108EnableGlobalMerge("enable-global-merge", cl::Hidden,
109 cl::desc("Enable the global merge pass"),
110 cl::init(true));
111
113GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
114 cl::desc("Set maximum offset for global merge pass"),
115 cl::init(0));
116
118 "global-merge-group-by-use", cl::Hidden,
119 cl::desc("Improve global merge pass to look at uses"), cl::init(true));
120
122 "global-merge-ignore-single-use", cl::Hidden,
123 cl::desc("Improve global merge pass to ignore globals only used alone"),
124 cl::init(true));
125
126static cl::opt<bool>
127EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
128 cl::desc("Enable global merge pass on constants"),
129 cl::init(false));
130
131// FIXME: this could be a transitional option, and we probably need to remove
132// it if only we are sure this optimization could always benefit all targets.
134EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
135 cl::desc("Enable global merge pass on external linkage"));
136
137STATISTIC(NumMerged, "Number of globals merged");
138
139namespace {
140
141class GlobalMergeImpl {
142 const TargetMachine *TM = nullptr;
144 bool IsMachO = false;
145
146private:
147 bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, Module &M,
148 bool isConst, unsigned AddrSpace) const;
149
150 /// Merge everything in \p Globals for which the corresponding bit
151 /// in \p GlobalSet is set.
152 bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
153 const BitVector &GlobalSet, Module &M, bool isConst,
154 unsigned AddrSpace) const;
155
156 /// Check if the given variable has been identified as must keep
157 /// \pre setMustKeepGlobalVariables must have been called on the Module that
158 /// contains GV
159 bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
160 return MustKeepGlobalVariables.count(GV);
161 }
162
163 /// Collect every variables marked as "used" or used in a landing pad
164 /// instruction for this Module.
165 void setMustKeepGlobalVariables(Module &M);
166
167 /// Collect every variables marked as "used"
169
170 /// Keep track of the GlobalVariable that must not be merged away
171 SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
172
173public:
174 GlobalMergeImpl(const TargetMachine *TM, GlobalMergeOptions Opt)
175 : TM(TM), Opt(Opt) {}
176 bool run(Module &M);
177};
178
179class GlobalMerge : public FunctionPass {
180 const TargetMachine *TM = nullptr;
182
183public:
184 static char ID; // Pass identification, replacement for typeid.
185
186 explicit GlobalMerge() : FunctionPass(ID) {
189 }
190
191 explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
192 bool OnlyOptimizeForSize, bool MergeExternalGlobals)
193 : FunctionPass(ID), TM(TM) {
194 Opt.MaxOffset = MaximalOffset;
195 Opt.SizeOnly = OnlyOptimizeForSize;
196 Opt.MergeExternal = MergeExternalGlobals;
198 }
199
200 bool doInitialization(Module &M) override {
201 GlobalMergeImpl P(TM, Opt);
202 return P.run(M);
203 }
204 bool runOnFunction(Function &F) override { return false; }
205
206 StringRef getPassName() const override { return "Merge internal globals"; }
207
208 void getAnalysisUsage(AnalysisUsage &AU) const override {
209 AU.setPreservesCFG();
211 }
212};
213
214} // end anonymous namespace
215
217 GlobalMergeImpl P(TM, Options);
218 bool Changed = P.run(M);
219 if (!Changed)
220 return PreservedAnalyses::all();
221
224 return PA;
225}
226
227char GlobalMerge::ID = 0;
228
229INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
230
231bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
232 Module &M, bool isConst,
233 unsigned AddrSpace) const {
234 auto &DL = M.getDataLayout();
235 // FIXME: Find better heuristics
237 Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
238 // We don't support scalable global variables.
239 return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() <
240 DL.getTypeAllocSize(GV2->getValueType()).getFixedValue();
241 });
242
243 // If we want to just blindly group all globals together, do so.
245 BitVector AllGlobals(Globals.size());
246 AllGlobals.set();
247 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
248 }
249
250 // If we want to be smarter, look at all uses of each global, to try to
251 // discover all sets of globals used together, and how many times each of
252 // these sets occurred.
253 //
254 // Keep this reasonably efficient, by having an append-only list of all sets
255 // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
256 // code (currently, a Function) to the set of globals seen so far that are
257 // used together in that unit (GlobalUsesByFunction).
258 //
259 // When we look at the Nth global, we know that any new set is either:
260 // - the singleton set {N}, containing this global only, or
261 // - the union of {N} and a previously-discovered set, containing some
262 // combination of the previous N-1 globals.
263 // Using that knowledge, when looking at the Nth global, we can keep:
264 // - a reference to the singleton set {N} (CurGVOnlySetIdx)
265 // - a list mapping each previous set to its union with {N} (EncounteredUGS),
266 // if it actually occurs.
267
268 // We keep track of the sets of globals used together "close enough".
269 struct UsedGlobalSet {
270 BitVector Globals;
271 unsigned UsageCount = 1;
272
273 UsedGlobalSet(size_t Size) : Globals(Size) {}
274 };
275
276 // Each set is unique in UsedGlobalSets.
277 std::vector<UsedGlobalSet> UsedGlobalSets;
278
279 // Avoid repeating the create-global-set pattern.
280 auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
281 UsedGlobalSets.emplace_back(Globals.size());
282 return UsedGlobalSets.back();
283 };
284
285 // The first set is the empty set.
286 CreateGlobalSet().UsageCount = 0;
287
288 // We define "close enough" to be "in the same function".
289 // FIXME: Grouping uses by function is way too aggressive, so we should have
290 // a better metric for distance between uses.
291 // The obvious alternative would be to group by BasicBlock, but that's in
292 // turn too conservative..
293 // Anything in between wouldn't be trivial to compute, so just stick with
294 // per-function grouping.
295
296 // The value type is an index into UsedGlobalSets.
297 // The default (0) conveniently points to the empty set.
298 DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
299
300 // Now, look at each merge-eligible global in turn.
301
302 // Keep track of the sets we already encountered to which we added the
303 // current global.
304 // Each element matches the same-index element in UsedGlobalSets.
305 // This lets us efficiently tell whether a set has already been expanded to
306 // include the current global.
307 std::vector<size_t> EncounteredUGS;
308
309 for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
310 GlobalVariable *GV = Globals[GI];
311
312 // Reset the encountered sets for this global...
313 std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
314 // ...and grow it in case we created new sets for the previous global.
315 EncounteredUGS.resize(UsedGlobalSets.size());
316
317 // We might need to create a set that only consists of the current global.
318 // Keep track of its index into UsedGlobalSets.
319 size_t CurGVOnlySetIdx = 0;
320
321 // For each global, look at all its Uses.
322 for (auto &U : GV->uses()) {
323 // This Use might be a ConstantExpr. We're interested in Instruction
324 // users, so look through ConstantExpr...
325 Use *UI, *UE;
326 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
327 if (CE->use_empty())
328 continue;
329 UI = &*CE->use_begin();
330 UE = nullptr;
331 } else if (isa<Instruction>(U.getUser())) {
332 UI = &U;
333 UE = UI->getNext();
334 } else {
335 continue;
336 }
337
338 // ...to iterate on all the instruction users of the global.
339 // Note that we iterate on Uses and not on Users to be able to getNext().
340 for (; UI != UE; UI = UI->getNext()) {
341 Instruction *I = dyn_cast<Instruction>(UI->getUser());
342 if (!I)
343 continue;
344
345 Function *ParentFn = I->getParent()->getParent();
346
347 // If we're only optimizing for size, ignore non-minsize functions.
348 if (Opt.SizeOnly && !ParentFn->hasMinSize())
349 continue;
350
351 size_t UGSIdx = GlobalUsesByFunction[ParentFn];
352
353 // If this is the first global the basic block uses, map it to the set
354 // consisting of this global only.
355 if (!UGSIdx) {
356 // If that set doesn't exist yet, create it.
357 if (!CurGVOnlySetIdx) {
358 CurGVOnlySetIdx = UsedGlobalSets.size();
359 CreateGlobalSet().Globals.set(GI);
360 } else {
361 ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
362 }
363
364 GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
365 continue;
366 }
367
368 // If we already encountered this BB, just increment the counter.
369 if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
370 ++UsedGlobalSets[UGSIdx].UsageCount;
371 continue;
372 }
373
374 // If not, the previous set wasn't actually used in this function.
375 --UsedGlobalSets[UGSIdx].UsageCount;
376
377 // If we already expanded the previous set to include this global, just
378 // reuse that expanded set.
379 if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
380 ++UsedGlobalSets[ExpandedIdx].UsageCount;
381 GlobalUsesByFunction[ParentFn] = ExpandedIdx;
382 continue;
383 }
384
385 // If not, create a new set consisting of the union of the previous set
386 // and this global. Mark it as encountered, so we can reuse it later.
387 GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
388 UsedGlobalSets.size();
389
390 UsedGlobalSet &NewUGS = CreateGlobalSet();
391 NewUGS.Globals.set(GI);
392 NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
393 }
394 }
395 }
396
397 // Now we found a bunch of sets of globals used together. We accumulated
398 // the number of times we encountered the sets (i.e., the number of blocks
399 // that use that exact set of globals).
400 //
401 // Multiply that by the size of the set to give us a crude profitability
402 // metric.
403 llvm::stable_sort(UsedGlobalSets,
404 [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
405 return UGS1.Globals.count() * UGS1.UsageCount <
406 UGS2.Globals.count() * UGS2.UsageCount;
407 });
408
409 // We can choose to merge all globals together, but ignore globals never used
410 // with another global. This catches the obviously non-profitable cases of
411 // having a single global, but is aggressive enough for any other case.
413 BitVector AllGlobals(Globals.size());
414 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
415 if (UGS.UsageCount == 0)
416 continue;
417 if (UGS.Globals.count() > 1)
418 AllGlobals |= UGS.Globals;
419 }
420 return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
421 }
422
423 // Starting from the sets with the best (=biggest) profitability, find a
424 // good combination.
425 // The ideal (and expensive) solution can only be found by trying all
426 // combinations, looking for the one with the best profitability.
427 // Don't be smart about it, and just pick the first compatible combination,
428 // starting with the sets with the best profitability.
429 BitVector PickedGlobals(Globals.size());
430 bool Changed = false;
431
432 for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
433 if (UGS.UsageCount == 0)
434 continue;
435 if (PickedGlobals.anyCommon(UGS.Globals))
436 continue;
437 PickedGlobals |= UGS.Globals;
438 // If the set only contains one global, there's no point in merging.
439 // Ignore the global for inclusion in other sets though, so keep it in
440 // PickedGlobals.
441 if (UGS.Globals.count() < 2)
442 continue;
443 Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
444 }
445
446 return Changed;
447}
448
449bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
450 const BitVector &GlobalSet, Module &M,
451 bool isConst, unsigned AddrSpace) const {
452 assert(Globals.size() > 1);
453
454 Type *Int32Ty = Type::getInt32Ty(M.getContext());
455 Type *Int8Ty = Type::getInt8Ty(M.getContext());
456 auto &DL = M.getDataLayout();
457
458 LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
459 << GlobalSet.find_first() << "\n");
460
461 bool Changed = false;
462 ssize_t i = GlobalSet.find_first();
463 while (i != -1) {
464 ssize_t j = 0;
465 uint64_t MergedSize = 0;
466 std::vector<Type*> Tys;
467 std::vector<Constant*> Inits;
468 std::vector<unsigned> StructIdxs;
469
470 bool HasExternal = false;
471 StringRef FirstExternalName;
472 Align MaxAlign;
473 unsigned CurIdx = 0;
474 for (j = i; j != -1; j = GlobalSet.find_next(j)) {
475 Type *Ty = Globals[j]->getValueType();
476
477 // Make sure we use the same alignment AsmPrinter would use.
478 Align Alignment = DL.getPreferredAlign(Globals[j]);
479 unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
480 MergedSize += Padding;
481 MergedSize += DL.getTypeAllocSize(Ty);
482 if (MergedSize > Opt.MaxOffset) {
483 break;
484 }
485 if (Padding) {
486 Tys.push_back(ArrayType::get(Int8Ty, Padding));
487 Inits.push_back(ConstantAggregateZero::get(Tys.back()));
488 ++CurIdx;
489 }
490 Tys.push_back(Ty);
491 Inits.push_back(Globals[j]->getInitializer());
492 StructIdxs.push_back(CurIdx++);
493
494 MaxAlign = std::max(MaxAlign, Alignment);
495
496 if (Globals[j]->hasExternalLinkage() && !HasExternal) {
497 HasExternal = true;
498 FirstExternalName = Globals[j]->getName();
499 }
500 }
501
502 // Exit early if there is only one global to merge.
503 if (Tys.size() < 2) {
504 i = j;
505 continue;
506 }
507
508 // If merged variables doesn't have external linkage, we needn't to expose
509 // the symbol after merging.
513 // Use a packed struct so we can control alignment.
514 StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
515 Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
516
517 // On Darwin external linkage needs to be preserved, otherwise
518 // dsymutil cannot preserve the debug info for the merged
519 // variables. If they have external linkage, use the symbol name
520 // of the first variable merged as the suffix of global symbol
521 // name. This avoids a link-time naming conflict for the
522 // _MergedGlobals symbols.
523 Twine MergedName =
524 (IsMachO && HasExternal)
525 ? "_MergedGlobals_" + FirstExternalName
526 : "_MergedGlobals";
527 auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
528 auto *MergedGV = new GlobalVariable(
529 M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
531
532 MergedGV->setAlignment(MaxAlign);
533 MergedGV->setSection(Globals[i]->getSection());
534
535 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
536 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
537 GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
538 std::string Name(Globals[k]->getName());
539 GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
541 Globals[k]->getDLLStorageClass();
542
543 // Copy metadata while adjusting any debug info metadata by the original
544 // global's offset within the merged global.
545 MergedGV->copyMetadata(Globals[k],
546 MergedLayout->getElementOffset(StructIdxs[idx]));
547
548 Constant *Idx[2] = {
550 ConstantInt::get(Int32Ty, StructIdxs[idx]),
551 };
552 Constant *GEP =
553 ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
554 Globals[k]->replaceAllUsesWith(GEP);
555 Globals[k]->eraseFromParent();
556
557 // When the linkage is not internal we must emit an alias for the original
558 // variable name as it may be accessed from another object. On non-Mach-O
559 // we can also emit an alias for internal linkage as it's safe to do so.
560 // It's not safe on Mach-O as the alias (and thus the portion of the
561 // MergedGlobals variable) may be dead stripped at link time.
562 if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
563 GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
564 Linkage, Name, GEP, &M);
565 GA->setVisibility(Visibility);
566 GA->setDLLStorageClass(DLLStorage);
567 }
568
569 NumMerged++;
570 }
571 Changed = true;
572 i = j;
573 }
574
575 return Changed;
576}
577
578void GlobalMergeImpl::collectUsedGlobalVariables(Module &M, StringRef Name) {
579 // Extract global variables from llvm.used array
580 const GlobalVariable *GV = M.getGlobalVariable(Name);
581 if (!GV || !GV->hasInitializer()) return;
582
583 // Should be an array of 'i8*'.
584 const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
585
586 for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
587 if (const GlobalVariable *G =
588 dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
589 MustKeepGlobalVariables.insert(G);
590}
591
592void GlobalMergeImpl::setMustKeepGlobalVariables(Module &M) {
593 collectUsedGlobalVariables(M, "llvm.used");
594 collectUsedGlobalVariables(M, "llvm.compiler.used");
595
596 for (Function &F : M) {
597 for (BasicBlock &BB : F) {
598 Instruction *Pad = BB.getFirstNonPHI();
599 if (!Pad->isEHPad())
600 continue;
601
602 // Keep globals used by landingpads and catchpads.
603 for (const Use &U : Pad->operands()) {
604 if (const GlobalVariable *GV =
605 dyn_cast<GlobalVariable>(U->stripPointerCasts()))
606 MustKeepGlobalVariables.insert(GV);
607 else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
608 for (const Use &Elt : CA->operands()) {
609 if (const GlobalVariable *GV =
610 dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
611 MustKeepGlobalVariables.insert(GV);
612 }
613 }
614 }
615 }
616 }
617}
618
619bool GlobalMergeImpl::run(Module &M) {
621 return false;
622
623 IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
624
625 auto &DL = M.getDataLayout();
627 Globals, ConstGlobals, BSSGlobals;
628 bool Changed = false;
629 setMustKeepGlobalVariables(M);
630
631 LLVM_DEBUG({
632 dbgs() << "Number of GV that must be kept: " <<
633 MustKeepGlobalVariables.size() << "\n";
634 for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
635 dbgs() << "Kept: " << *KeptGV << "\n";
636 });
637 // Grab all non-const globals.
638 for (auto &GV : M.globals()) {
639 // Merge is safe for "normal" internal or external globals only
640 if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
641 continue;
642
643 // It's not safe to merge globals that may be preempted
644 if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
645 continue;
646
647 if (!(Opt.MergeExternal && GV.hasExternalLinkage()) &&
648 !GV.hasInternalLinkage())
649 continue;
650
651 PointerType *PT = dyn_cast<PointerType>(GV.getType());
652 assert(PT && "Global variable is not a pointer!");
653
654 unsigned AddressSpace = PT->getAddressSpace();
656
657 // Ignore all 'special' globals.
658 if (GV.getName().starts_with("llvm.") || GV.getName().starts_with(".llvm."))
659 continue;
660
661 // Ignore all "required" globals:
662 if (isMustKeepGlobalVariable(&GV))
663 continue;
664
665 // Don't merge tagged globals, as each global should have its own unique
666 // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
667 // with compatible alignment and the same contents may be merged as long as
668 // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
669 // size_b / 16`).
670 if (GV.isTagged())
671 continue;
672
673 Type *Ty = GV.getValueType();
674 if (DL.getTypeAllocSize(Ty) < Opt.MaxOffset) {
675 if (TM &&
677 BSSGlobals[{AddressSpace, Section}].push_back(&GV);
678 else if (GV.isConstant())
679 ConstGlobals[{AddressSpace, Section}].push_back(&GV);
680 else
681 Globals[{AddressSpace, Section}].push_back(&GV);
682 }
683 }
684
685 for (auto &P : Globals)
686 if (P.second.size() > 1)
687 Changed |= doMerge(P.second, M, false, P.first.first);
688
689 for (auto &P : BSSGlobals)
690 if (P.second.size() > 1)
691 Changed |= doMerge(P.second, M, false, P.first.first);
692
694 for (auto &P : ConstGlobals)
695 if (P.second.size() > 1)
696 Changed |= doMerge(P.second, M, true, P.first.first);
697
698 return Changed;
699}
700
702 bool OnlyOptimizeForSize,
703 bool MergeExternalByDefault) {
704 bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
705 MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
706 return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
707}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
#define DEBUG_TYPE
static cl::opt< bool > GlobalMergeIgnoreSingleUse("global-merge-ignore-single-use", cl::Hidden, cl::desc("Improve global merge pass to ignore globals only used alone"), cl::init(true))
static cl::opt< bool > GlobalMergeGroupByUse("global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true))
static cl::opt< bool > EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false))
static cl::opt< unsigned > GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, cl::desc("Set maximum offset for global merge pass"), cl::init(0))
static cl::opt< bool > EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true))
static cl::opt< cl::boolOrDefault > EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"))
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Module.h This file contains the declarations for the Module class.
IntegerType * Int32Ty
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This defines the Use class.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:300
bool back() const
Return the last element in the vector.
Definition: BitVector.h:456
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:308
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1579
ConstantArray - Constant Array Declarations.
Definition: Constants.h:408
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1002
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1213
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1300
This is an important base class in LLVM.
Definition: Constant.h:41
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:674
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:518
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:118
bool hasExternalLinkage() const
Definition: GlobalValue.h:510
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:274
bool isTagged() const
Definition: GlobalValue.h:364
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:284
DLLStorageClassTypes
Storage classes of global values for PE targets.
Definition: GlobalValue.h:73
bool hasInternalLinkage() const
Definition: GlobalValue.h:525
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:66
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool hasImplicitSection() const
Check if section name is present.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:800
void push_back(MachineInstr *MI)
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:651
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM)
Classify the specified global variable into a set of target independent categories embodied in Sectio...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:716
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Expected< const typename ELFT::Shdr * > getSection(typename ELFT::ShdrRange Sections, uint32_t Index)
Definition: ELF.h:485
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
void stable_sort(R &&Range)
Definition: STLExtras.h:1975
AddressSpace
Definition: NVPTXBaseInfo.h:21
void initializeGlobalMergePass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
Pass * createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize=false, bool MergeExternalByDefault=false)
GlobalMerge - This pass merges internal (by default) globals into structs to enable reuse of a base p...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:822
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool SizeOnly
Whether we should try to optimize for size only.
Definition: GlobalMerge.h:33
bool MergeExternal
Whether we should merge global variables that have external linkage.
Definition: GlobalMerge.h:28